[Openmp-commits] [openmp] d3e7491 - Revert Attributor patch series

Nico Weber via Openmp-commits openmp-commits at lists.llvm.org
Sat Jul 10 13:19:58 PDT 2021


Author: Nico Weber
Date: 2021-07-10T16:15:55-04:00
New Revision: d3e749133319aaea6a143b1404154345a3cc2541

URL: https://github.com/llvm/llvm-project/commit/d3e749133319aaea6a143b1404154345a3cc2541
DIFF: https://github.com/llvm/llvm-project/commit/d3e749133319aaea6a143b1404154345a3cc2541.diff

LOG: Revert Attributor patch series

Broke check-clang, see https://reviews.llvm.org/D102307#2869065
Ran `git revert -n ebbe149a6f08535ede848a531a601ae6591cfbc5..269416d41908bb670f67af689155d5ab8eea689a`

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
    clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
    llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
    llvm/include/llvm/Transforms/IPO/Attributor.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/lib/IR/Assumptions.cpp
    llvm/lib/Transforms/IPO/Attributor.cpp
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll
    llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
    llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll
    llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
    llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
    llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll
    llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll
    llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll
    llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
    llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll
    llvm/test/Transforms/Attributor/align.ll
    llvm/test/Transforms/Attributor/cb_liveness_disabled.ll
    llvm/test/Transforms/Attributor/cb_liveness_enabled.ll
    llvm/test/Transforms/Attributor/cb_range_enabled.ll
    llvm/test/Transforms/Attributor/cgscc_bugs.ll
    llvm/test/Transforms/Attributor/depgraph.ll
    llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll
    llvm/test/Transforms/Attributor/dereferenceable-2.ll
    llvm/test/Transforms/Attributor/heap_to_stack.ll
    llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
    llvm/test/Transforms/Attributor/internal-noalias.ll
    llvm/test/Transforms/Attributor/internalize.ll
    llvm/test/Transforms/Attributor/liveness.ll
    llvm/test/Transforms/Attributor/lvi-for-ashr.ll
    llvm/test/Transforms/Attributor/memory_locations.ll
    llvm/test/Transforms/Attributor/noalias.ll
    llvm/test/Transforms/Attributor/nocapture-1.ll
    llvm/test/Transforms/Attributor/nocapture-2.ll
    llvm/test/Transforms/Attributor/nodelete.ll
    llvm/test/Transforms/Attributor/nonnull.ll
    llvm/test/Transforms/Attributor/norecurse.ll
    llvm/test/Transforms/Attributor/noundef.ll
    llvm/test/Transforms/Attributor/potential.ll
    llvm/test/Transforms/Attributor/range.ll
    llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
    llvm/test/Transforms/Attributor/readattrs.ll
    llvm/test/Transforms/Attributor/returned.ll
    llvm/test/Transforms/Attributor/undefined_behavior.ll
    llvm/test/Transforms/Attributor/value-simplify.ll
    llvm/test/Transforms/OpenMP/globalization_remarks.ll
    llvm/test/Transforms/OpenMP/remove_globalization.ll
    llvm/test/Transforms/OpenMP/replace_globalization.ll
    llvm/test/Transforms/OpenMP/single_threaded_execution.ll
    openmp/libomptarget/deviceRTLs/common/src/loop.cu
    openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
    openmp/libomptarget/deviceRTLs/common/src/parallel.cu
    openmp/libomptarget/deviceRTLs/common/src/reduction.cu
    openmp/libomptarget/deviceRTLs/common/src/support.cu
    openmp/libomptarget/deviceRTLs/common/src/sync.cu
    openmp/libomptarget/deviceRTLs/common/src/task.cu
    openmp/libomptarget/deviceRTLs/common/support.h
    openmp/libomptarget/deviceRTLs/interface.h
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu

Removed: 
    llvm/test/Transforms/OpenMP/custom_state_machines.ll
    llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
    llvm/test/Transforms/OpenMP/spmdization.ll
    llvm/test/Transforms/OpenMP/spmdization_remarks.ll
    openmp/libomptarget/deviceRTLs/common/include/target.h


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 1cb367ec7188..965b3f1534d6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -553,6 +553,63 @@ static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
                        "nvptx_lane_id");
 }
 
+/// Get the value of the thread_limit clause in the teams directive.
+/// For the 'generic' execution mode, the runtime encodes thread_limit in
+/// the launch parameters, always starting thread_limit+warpSize threads per
+/// CTA. The threads in the last warp are reserved for master execution.
+/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
+static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
+                                   bool IsInSPMDExecutionMode = false) {
+  CGBuilderTy &Bld = CGF.Builder;
+  auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+  llvm::Value *ThreadLimit = nullptr;
+  if (IsInSPMDExecutionMode)
+    ThreadLimit = RT.getGPUNumThreads(CGF);
+  else {
+    llvm::Value *GPUNumThreads = RT.getGPUNumThreads(CGF);
+    llvm::Value *GPUWarpSize = RT.getGPUWarpSize(CGF);
+    ThreadLimit = Bld.CreateNUWSub(GPUNumThreads, GPUWarpSize, "thread_limit");
+  }
+  assert(ThreadLimit != nullptr && "Expected non-null ThreadLimit");
+  return ThreadLimit;
+}
+
+/// Get the thread id of the OMP master thread.
+/// The master thread id is the first thread (lane) of the last warp in the
+/// GPU block.  Warp size is assumed to be some power of 2.
+/// Thread id is 0 indexed.
+/// E.g: If NumThreads is 33, master id is 32.
+///      If NumThreads is 64, master id is 32.
+///      If NumThreads is 1024, master id is 992.
+static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+  llvm::Value *NumThreads = RT.getGPUNumThreads(CGF);
+  // We assume that the warp size is a power of 2.
+  llvm::Value *Mask = Bld.CreateNUWSub(RT.getGPUWarpSize(CGF), Bld.getInt32(1));
+
+  llvm::Value *NumThreadsSubOne = Bld.CreateNUWSub(NumThreads, Bld.getInt32(1));
+  return Bld.CreateAnd(NumThreadsSubOne, Bld.CreateNot(Mask), "master_tid");
+}
+
+CGOpenMPRuntimeGPU::WorkerFunctionState::WorkerFunctionState(
+    CodeGenModule &CGM, SourceLocation Loc)
+    : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()),
+      Loc(Loc) {
+  createWorkerFunction(CGM);
+}
+
+void CGOpenMPRuntimeGPU::WorkerFunctionState::createWorkerFunction(
+    CodeGenModule &CGM) {
+  // Create an worker function with no arguments.
+
+  WorkerFn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      /*placeholder=*/"_worker", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI);
+  WorkerFn->setDoesNotRecurse();
+}
+
 CGOpenMPRuntimeGPU::ExecutionMode
 CGOpenMPRuntimeGPU::getExecutionMode() const {
   return CurrentExecutionMode;
@@ -1016,19 +1073,23 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
                                              const RegionCodeGenTy &CodeGen) {
   ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
   EntryFunctionState EST;
+  WorkerFunctionState WST(CGM, D.getBeginLoc());
+  Work.clear();
   WrapperFunctionsMap.clear();
 
   // Emit target region as a standalone region.
   class NVPTXPrePostActionTy : public PrePostActionTy {
     CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+    CGOpenMPRuntimeGPU::WorkerFunctionState &WST;
 
   public:
-    NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST)
-        : EST(EST) {}
+    NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+                         CGOpenMPRuntimeGPU::WorkerFunctionState &WST)
+        : EST(EST), WST(WST) {}
     void Enter(CodeGenFunction &CGF) override {
       auto &RT =
           static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
-      RT.emitKernelInit(CGF, EST, /* IsSPMD */ false);
+      RT.emitNonSPMDEntryHeader(CGF, EST, WST);
       // Skip target region initialization.
       RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
     }
@@ -1036,33 +1097,93 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
       auto &RT =
           static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
       RT.clearLocThreadIdInsertPt(CGF);
-      RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ false);
+      RT.emitNonSPMDEntryFooter(CGF, EST);
     }
-  } Action(EST);
+  } Action(EST, WST);
   CodeGen.setAction(Action);
   IsInTTDRegion = true;
   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
                                    IsOffloadEntry, CodeGen);
   IsInTTDRegion = false;
+
+  // Now change the name of the worker function to correspond to this target
+  // region's entry function.
+  WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker"));
+
+  // Create the worker function
+  emitWorkerFunction(WST);
 }
 
-void CGOpenMPRuntimeGPU::emitKernelInit(CodeGenFunction &CGF,
-                                        EntryFunctionState &EST, bool IsSPMD) {
+// Setup NVPTX threads for master-worker OpenMP scheme.
+void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF,
+                                                  EntryFunctionState &EST,
+                                                  WorkerFunctionState &WST) {
   CGBuilderTy &Bld = CGF.Builder;
-  Bld.restoreIP(OMPBuilder.createTargetInit(Bld, IsSPMD, requiresFullRuntime()));
-  IsInTargetMasterThreadRegion = IsSPMD;
-  if (!IsSPMD)
-    emitGenericVarsProlog(CGF, EST.Loc);
+
+  llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
+  llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+  llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
+  EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+  llvm::Value *GPUThreadID = RT.getGPUThreadID(CGF);
+  llvm::Value *ThreadLimit = getThreadLimit(CGF);
+  llvm::Value *IsWorker = Bld.CreateICmpULT(GPUThreadID, ThreadLimit);
+  Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
+
+  CGF.EmitBlock(WorkerBB);
+  emitCall(CGF, WST.Loc, WST.WorkerFn);
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(MasterCheckBB);
+  GPUThreadID = RT.getGPUThreadID(CGF);
+  llvm::Value *MasterThreadID = getMasterThreadID(CGF);
+  llvm::Value *IsMaster = Bld.CreateICmpEQ(GPUThreadID, MasterThreadID);
+  Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
+
+  CGF.EmitBlock(MasterBB);
+  IsInTargetMasterThreadRegion = true;
+  // SEQUENTIAL (MASTER) REGION START
+  // First action in sequential region:
+  // Initialize the state of the OpenMP runtime library on the GPU.
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {getThreadLimit(CGF),
+                         Bld.getInt16(/*RequiresOMPRuntime=*/1)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_kernel_init),
+                      Args);
+
+  emitGenericVarsProlog(CGF, WST.Loc);
 }
 
-void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF,
-                                          EntryFunctionState &EST,
-                                          bool IsSPMD) {
-  if (!IsSPMD)
-    emitGenericVarsEpilog(CGF);
+void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF,
+                                                  EntryFunctionState &EST) {
+  IsInTargetMasterThreadRegion = false;
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  emitGenericVarsEpilog(CGF);
 
-  CGBuilderTy &Bld = CGF.Builder;
-  OMPBuilder.createTargetDeinit(Bld, IsSPMD, requiresFullRuntime());
+  if (!EST.ExitBB)
+    EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
+  CGF.EmitBranch(TerminateBB);
+
+  CGF.EmitBlock(TerminateBB);
+  // Signal termination condition.
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_kernel_deinit),
+                      Args);
+  // Barrier to terminate worker threads.
+  syncCTAThreads(CGF);
+  // Master thread jumps to exit point.
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(EST.ExitBB);
+  EST.ExitBB = nullptr;
 }
 
 void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
@@ -1081,21 +1202,23 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
   class NVPTXPrePostActionTy : public PrePostActionTy {
     CGOpenMPRuntimeGPU &RT;
     CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+    const OMPExecutableDirective &D;
 
   public:
     NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
-                         CGOpenMPRuntimeGPU::EntryFunctionState &EST)
-        : RT(RT), EST(EST) {}
+                         CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+                         const OMPExecutableDirective &D)
+        : RT(RT), EST(EST), D(D) {}
     void Enter(CodeGenFunction &CGF) override {
-      RT.emitKernelInit(CGF, EST, /* IsSPMD */ true);
+      RT.emitSPMDEntryHeader(CGF, EST, D);
       // Skip target region initialization.
       RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
     }
     void Exit(CodeGenFunction &CGF) override {
       RT.clearLocThreadIdInsertPt(CGF);
-      RT.emitKernelDeinit(CGF, EST, /* IsSPMD */ true);
+      RT.emitSPMDEntryFooter(CGF, EST);
     }
-  } Action(*this, EST);
+  } Action(*this, EST, D);
   CodeGen.setAction(Action);
   IsInTTDRegion = true;
   emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
@@ -1103,6 +1226,54 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
   IsInTTDRegion = false;
 }
 
+void CGOpenMPRuntimeGPU::emitSPMDEntryHeader(
+    CodeGenFunction &CGF, EntryFunctionState &EST,
+    const OMPExecutableDirective &D) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  // Setup BBs in entry function.
+  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
+  EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
+                         /*RequiresOMPRuntime=*/
+                         Bld.getInt16(RequiresFullRuntime ? 1 : 0)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init),
+                      Args);
+
+  CGF.EmitBranch(ExecuteBB);
+
+  CGF.EmitBlock(ExecuteBB);
+
+  IsInTargetMasterThreadRegion = true;
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF,
+                                               EntryFunctionState &EST) {
+  IsInTargetMasterThreadRegion = false;
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  if (!EST.ExitBB)
+    EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
+  CGF.EmitBranch(OMPDeInitBB);
+
+  CGF.EmitBlock(OMPDeInitBB);
+  // DeInitialize the OMP state in the runtime; called by all active threads.
+  llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
+                         CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_spmd_kernel_deinit_v2),
+                      Args);
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(EST.ExitBB);
+  EST.ExitBB = nullptr;
+}
+
 // Create a unique global variable to indicate the execution mode of this target
 // region. The execution mode is either 'generic', or 'spmd' depending on the
 // target directive. This variable is picked up by the offload library to setup
@@ -1119,6 +1290,137 @@ static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
   CGM.addCompilerUsedGlobal(GVMode);
 }
 
+void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) {
+  ASTContext &Ctx = CGM.getContext();
+
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {},
+                    WST.Loc, WST.Loc);
+  emitWorkerLoop(CGF, WST);
+  CGF.FinishFunction();
+}
+
+void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF,
+                                        WorkerFunctionState &WST) {
+  //
+  // The workers enter this loop and wait for parallel work from the master.
+  // When the master encounters a parallel region it sets up the work + variable
+  // arguments, and wakes up the workers.  The workers first check to see if
+  // they are required for the parallel region, i.e., within the # of requested
+  // parallel threads.  The activated workers load the variable arguments and
+  // execute the parallel work.
+  //
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
+  llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
+  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
+  llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+
+  CGF.EmitBranch(AwaitBB);
+
+  // Workers wait for work from master.
+  CGF.EmitBlock(AwaitBB);
+  // Wait for parallel work
+  syncCTAThreads(CGF);
+
+  Address WorkFn =
+      CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn");
+  Address ExecStatus =
+      CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status");
+  CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
+  CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
+
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {WorkFn.getPointer()};
+  llvm::Value *Ret =
+      CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                              CGM.getModule(), OMPRTL___kmpc_kernel_parallel),
+                          Args);
+  Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
+
+  // On termination condition (workid == 0), exit loop.
+  llvm::Value *WorkID = Bld.CreateLoad(WorkFn);
+  llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID, "should_terminate");
+  Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
+
+  // Activate requested workers.
+  CGF.EmitBlock(SelectWorkersBB);
+  llvm::Value *IsActive =
+      Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active");
+  Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
+
+  // Signal start of parallel region.
+  CGF.EmitBlock(ExecuteBB);
+  // Skip initialization.
+  setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
+
+  // Process work items: outlined parallel functions.
+  for (llvm::Function *W : Work) {
+    // Try to match this outlined function.
+    llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
+
+    llvm::Value *WorkFnMatch =
+        Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
+
+    llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn");
+    llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next");
+    Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
+
+    // Execute this outlined function.
+    CGF.EmitBlock(ExecuteFNBB);
+
+    // Insert call to work function via shared wrapper. The shared
+    // wrapper takes two arguments:
+    //   - the parallelism level;
+    //   - the thread ID;
+    emitCall(CGF, WST.Loc, W,
+             {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
+
+    // Go to end of parallel region.
+    CGF.EmitBranch(TerminateBB);
+
+    CGF.EmitBlock(CheckNextBB);
+  }
+  // Default case: call to outlined function through pointer if the target
+  // region makes a declare target call that may contain an orphaned parallel
+  // directive.
+  auto *ParallelFnTy =
+      llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
+                              /*isVarArg=*/false);
+  llvm::Value *WorkFnCast =
+      Bld.CreateBitCast(WorkID, ParallelFnTy->getPointerTo());
+  // Insert call to work function via shared wrapper. The shared
+  // wrapper takes two arguments:
+  //   - the parallelism level;
+  //   - the thread ID;
+  emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast},
+           {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
+  // Go to end of parallel region.
+  CGF.EmitBranch(TerminateBB);
+
+  // Signal end of parallel region.
+  CGF.EmitBlock(TerminateBB);
+  CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+                          CGM.getModule(), OMPRTL___kmpc_kernel_end_parallel),
+                      llvm::None);
+  CGF.EmitBranch(BarrierBB);
+
+  // All active and inactive workers wait at a barrier after parallel region.
+  CGF.EmitBlock(BarrierBB);
+  // Barrier after parallel region.
+  syncCTAThreads(CGF);
+  CGF.EmitBranch(AwaitBB);
+
+  // Exit target region.
+  CGF.EmitBlock(ExitBB);
+  // Skip initialization.
+  clearLocThreadIdInsertPt(CGF);
+}
+
 void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,
                                               llvm::Constant *Addr,
                                               uint64_t Size, int32_t,
@@ -1504,8 +1806,11 @@ void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
     CGBuilderTy &Bld = CGF.Builder;
     llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
     llvm::Value *ID = llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
-    if (WFn)
+    if (WFn) {
       ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
+      // Remember for post-processing in worker loop.
+      Work.emplace_back(WFn);
+    }
     llvm::Value *FnPtr = Bld.CreateBitOrPointerCast(OutlinedFn, CGM.Int8PtrTy);
 
     // Create a private scope that will globalize the arguments

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index 464af1294b46..3decf48cbb93 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -38,7 +38,19 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
   llvm::SmallVector<llvm::Function *, 16> Work;
 
   struct EntryFunctionState {
+    llvm::BasicBlock *ExitBB = nullptr;
+  };
+
+  class WorkerFunctionState {
+  public:
+    llvm::Function *WorkerFn;
+    const CGFunctionInfo &CGFI;
     SourceLocation Loc;
+
+    WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
+
+  private:
+    void createWorkerFunction(CodeGenModule &CGM);
   };
 
   ExecutionMode getExecutionMode() const;
@@ -48,13 +60,20 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
   /// Get barrier to synchronize all threads in a block.
   void syncCTAThreads(CodeGenFunction &CGF);
 
-  /// Helper for target directive initialization.
-  void emitKernelInit(CodeGenFunction &CGF, EntryFunctionState &EST,
-                      bool IsSPMD);
+  /// Emit the worker function for the current target region.
+  void emitWorkerFunction(WorkerFunctionState &WST);
 
-  /// Helper for target directive finalization.
-  void emitKernelDeinit(CodeGenFunction &CGF, EntryFunctionState &EST,
-                        bool IsSPMD);
+  /// Helper for worker function. Emit body of worker loop.
+  void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
+
+  /// Helper for non-SPMD target entry function. Guide the master and
+  /// worker threads to their respective locations.
+  void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+                              WorkerFunctionState &WST);
+
+  /// Signal termination of OMP execution for non-SPMD target entry
+  /// function.
+  void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
 
   /// Helper for generic variables globalization prolog.
   void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
@@ -63,6 +82,13 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
   /// Helper for generic variables globalization epilog.
   void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
 
+  /// Helper for SPMD mode target directive's entry function.
+  void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+                           const OMPExecutableDirective &D);
+
+  /// Signal termination of SPMD mode execution.
+  void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
   //
   // Base class overrides.
   //

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index d174cc8992dd..a05aa231eb51 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -111,10 +111,7 @@ inline std::string getAllAssumeClauseOptions() {
 /// Todo: Update kmp.h to include this file, and remove the enums in kmp.h
 ///       To complete this, more enum values will need to be moved here.
 enum class OMPScheduleType {
-  StaticChunked = 33,
   Static = 34, // static unspecialized
-  DistributeChunked = 91,
-  Distribute = 92,
   DynamicChunked = 35,
   GuidedChunked = 36, // guided unspecialized
   Runtime = 37,

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index a92c3ba381c6..0a249b3e2574 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -779,29 +779,6 @@ class OpenMPIRBuilder {
                                       llvm::ConstantInt *Size,
                                       const llvm::Twine &Name = Twine(""));
 
-  /// The `omp target` interface
-  ///
-  /// For more information about the usage of this interface,
-  /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
-  ///
-  ///{
-
-  /// Create a runtime call for kmpc_target_init
-  ///
-  /// \param Loc The insert and source location description.
-  /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
-  /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
-  InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
-
-  /// Create a runtime call for kmpc_target_deinit
-  ///
-  /// \param Loc The insert and source location description.
-  /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
-  /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
-  void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
-
-  ///}
-
   /// Declarations for LLVM-IR types (simple, array, function and structure) are
   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
   /// we provide the declarations, the initializeTypes function will provide the

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 2003f44e34e9..1804cfeef7b8 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -409,8 +409,10 @@ __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
           /* Int */ Int32, /* kmp_task_t */ VoidPtr)
 
 /// OpenMP Device runtime functions
-__OMP_RTL(__kmpc_target_init, false, Int32, IdentPtr, Int1, Int1, Int1)
-__OMP_RTL(__kmpc_target_deinit, false, Void, IdentPtr, Int1, Int1)
+__OMP_RTL(__kmpc_kernel_init, false, Void, Int32, Int16)
+__OMP_RTL(__kmpc_kernel_deinit, false, Void, Int16)
+__OMP_RTL(__kmpc_spmd_kernel_init, false, Void, Int32, Int16)
+__OMP_RTL(__kmpc_spmd_kernel_deinit_v2, false, Void, Int16)
 __OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
 __OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
           VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)

diff  --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 7d10464e59a3..c44d5d4b28ae 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -142,12 +142,6 @@ namespace AA {
 /// instruction/argument of \p Scope.
 bool isValidInScope(const Value &V, const Function *Scope);
 
-/// Return true if \p V is a valid value at position \p CtxI, that is a
-/// constant, an argument of the same function as \p CtxI, or an instruction in
-/// that function that dominates \p CtxI.
-bool isValidAtPosition(const Value &V, const Instruction &CtxI,
-                       InformationCache &InfoCache);
-
 /// Try to convert \p V to type \p Ty without introducing new instructions. If
 /// this is not possible return `nullptr`. Note: this function basically knows
 /// how to cast various constants.
@@ -1120,7 +1114,7 @@ struct Attributor {
       : Allocator(InfoCache.Allocator), Functions(Functions),
         InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
         DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
-        MaxFixpointIterations(None), OREGetter(None), PassName("") {}
+        MaxFixpointIterations(None), OREGetter(None), PassName("")  {}
 
   /// Constructor
   ///
@@ -1487,12 +1481,6 @@ struct Attributor {
                                          bool &UsedAssumedInformation) {
     return getAssumedSimplified(IRP, &AA, UsedAssumedInformation);
   }
-  Optional<Value *> getAssumedSimplified(const Value &V,
-                                         const AbstractAttribute &AA,
-                                         bool &UsedAssumedInformation) {
-    return getAssumedSimplified(IRPosition::value(V), AA,
-                                UsedAssumedInformation);
-  }
 
   /// Register \p CB as a simplification callback.
   /// `Attributor::getAssumedSimplified` will use these callbacks before
@@ -1519,17 +1507,10 @@ struct Attributor {
                                          bool &UsedAssumedInformation);
 
 public:
-  /// Translate \p V from the callee context into the call site context.
-  Optional<Value *>
-  translateArgumentToCallSiteContent(Optional<Value *> V, CallBase &CB,
-                                     const AbstractAttribute &AA,
-                                     bool &UsedAssumedInformation);
-
   /// Return true if \p AA (or its context instruction) is assumed dead.
   ///
   /// If \p LivenessAA is not provided it is queried.
   bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA,
-                     bool &UsedAssumedInformation,
                      bool CheckBBLivenessOnly = false,
                      DepClassTy DepClass = DepClassTy::OPTIONAL);
 
@@ -1537,7 +1518,7 @@ struct Attributor {
   ///
   /// If \p LivenessAA is not provided it is queried.
   bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA,
-                     const AAIsDead *LivenessAA, bool &UsedAssumedInformation,
+                     const AAIsDead *LivenessAA,
                      bool CheckBBLivenessOnly = false,
                      DepClassTy DepClass = DepClassTy::OPTIONAL);
 
@@ -1545,7 +1526,7 @@ struct Attributor {
   ///
   /// If \p FnLivenessAA is not provided it is queried.
   bool isAssumedDead(const Use &U, const AbstractAttribute *QueryingAA,
-                     const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
+                     const AAIsDead *FnLivenessAA,
                      bool CheckBBLivenessOnly = false,
                      DepClassTy DepClass = DepClassTy::OPTIONAL);
 
@@ -1553,7 +1534,7 @@ struct Attributor {
   ///
   /// If \p FnLivenessAA is not provided it is queried.
   bool isAssumedDead(const IRPosition &IRP, const AbstractAttribute *QueryingAA,
-                     const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
+                     const AAIsDead *FnLivenessAA,
                      bool CheckBBLivenessOnly = false,
                      DepClassTy DepClass = DepClassTy::OPTIONAL);
 
@@ -1736,23 +1717,17 @@ struct Attributor {
   bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
                                const AbstractAttribute &QueryingAA,
                                const ArrayRef<unsigned> &Opcodes,
-                               bool &UsedAssumedInformation,
-                               bool CheckBBLivenessOnly = false,
-                               bool CheckPotentiallyDead = false);
+                               bool CheckBBLivenessOnly = false);
 
   /// Check \p Pred on all call-like instructions (=CallBased derived).
   ///
   /// See checkForAllCallLikeInstructions(...) for more information.
   bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred,
-                                       const AbstractAttribute &QueryingAA,
-                                       bool &UsedAssumedInformation,
-                                       bool CheckBBLivenessOnly = false,
-                                       bool CheckPotentiallyDead = false) {
-    return checkForAllInstructions(
-        Pred, QueryingAA,
-        {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
-         (unsigned)Instruction::Call},
-        UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead);
+                                       const AbstractAttribute &QueryingAA) {
+    return checkForAllInstructions(Pred, QueryingAA,
+                                   {(unsigned)Instruction::Invoke,
+                                    (unsigned)Instruction::CallBr,
+                                    (unsigned)Instruction::Call});
   }
 
   /// Check \p Pred on all Read/Write instructions.
@@ -1761,8 +1736,7 @@ struct Attributor {
   /// to memory present in the information cache and return true if \p Pred
   /// holds on all of them.
   bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred,
-                                        AbstractAttribute &QueryingAA,
-                                        bool &UsedAssumedInformation);
+                                        AbstractAttribute &QueryingAA);
 
   /// Create a shallow wrapper for \p F such that \p F has internal linkage
   /// afterwards. It also sets the original \p F 's name to anonymous
@@ -2680,6 +2654,7 @@ struct AAReturnedValues
   virtual llvm::iterator_range<const_iterator> returned_values() const = 0;
 
   virtual size_t getNumReturnValues() const = 0;
+  virtual const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const = 0;
 
   /// Create an abstract attribute view for the position \p IRP.
   static AAReturnedValues &createForPosition(const IRPosition &IRP,
@@ -3507,6 +3482,9 @@ struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> {
   /// Returns true if HeapToStack conversion is assumed to be possible.
   virtual bool isAssumedHeapToStack(CallBase &CB) const = 0;
 
+  /// Returns true if HeapToStack conversion is known to be possible.
+  virtual bool isKnownHeapToStack(CallBase &CB) const = 0;
+
   /// Create an abstract attribute view for the position \p IRP.
   static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);
 

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 60d71805c758..1020de5f30ee 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -20,7 +20,6 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Value.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -2192,70 +2191,6 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
   return Builder.CreateCall(Fn, Args);
 }
 
-OpenMPIRBuilder::InsertPointTy
-OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime) {
-  if (!updateToLocation(Loc))
-    return Loc.IP;
-
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
-  Value *Ident = getOrCreateIdent(SrcLocStr);
-  ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
-  ConstantInt *UseGenericStateMachine =
-      ConstantInt::getBool(Int32->getContext(), !IsSPMD);
-  ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
-
-  Function *Fn = getOrCreateRuntimeFunctionPtr(
-      omp::RuntimeFunction::OMPRTL___kmpc_target_init);
-
-  CallInst *ThreadKind =
-      Builder.CreateCall(Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
-
-  Value *ExecUserCode = Builder.CreateICmpEQ(
-      ThreadKind, ConstantInt::get(ThreadKind->getType(), -1), "exec_user_code");
-
-  // ThreadKind = __kmpc_target_init(...)
-  // if (ThreadKind == -1)
-  //   user_code
-  // else
-  //   return;
-
-  auto *UI = Builder.CreateUnreachable();
-  BasicBlock *CheckBB = UI->getParent();
-  BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
-
-  BasicBlock *WorkerExitBB = BasicBlock::Create(
-      CheckBB->getContext(), "worker.exit", CheckBB->getParent());
-  Builder.SetInsertPoint(WorkerExitBB);
-  Builder.CreateRetVoid();
-
-  auto *CheckBBTI = CheckBB->getTerminator();
-  Builder.SetInsertPoint(CheckBBTI);
-  Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
-
-  CheckBBTI->eraseFromParent();
-  UI->eraseFromParent();
-
-  // Continue in the "user_code" block, see diagram above and in
-  // openmp/libomptarget/deviceRTLs/common/include/target.h .
-  return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
-}
-
-void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
-                                         bool IsSPMD, bool RequiresFullRuntime) {
-  if (!updateToLocation(Loc))
-    return;
-
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
-  Value *Ident = getOrCreateIdent(SrcLocStr);
-  ConstantInt *IsSPMDVal = ConstantInt::getBool(Int32->getContext(), IsSPMD);
-  ConstantInt *RequiresFullRuntimeVal = ConstantInt::getBool(Int32->getContext(), RequiresFullRuntime);
-
-  Function *Fn = getOrCreateRuntimeFunctionPtr(
-      omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
-
-  Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
-}
-
 std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
                                                    StringRef FirstSeparator,
                                                    StringRef Separator) {

diff  --git a/llvm/lib/IR/Assumptions.cpp b/llvm/lib/IR/Assumptions.cpp
index 6498114cd60d..1bd8b7f51e67 100644
--- a/llvm/lib/IR/Assumptions.cpp
+++ b/llvm/lib/IR/Assumptions.cpp
@@ -33,5 +33,4 @@ StringSet<> llvm::KnownAssumptionStrings({
     "omp_no_openmp",          // OpenMP 5.1
     "omp_no_openmp_routines", // OpenMP 5.1
     "omp_no_parallelism",     // OpenMP 5.1
-    "ompx_spmd_amenable",     // OpenMPOpt extension
 });

diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 668580cf7213..af681ca76f16 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/IRBuilder.h"
@@ -176,22 +175,6 @@ bool AA::isValidInScope(const Value &V, const Function *Scope) {
   return false;
 }
 
-bool AA::isValidAtPosition(const Value &V, const Instruction &CtxI,
-                           InformationCache &InfoCache) {
-  if (isa<Constant>(V))
-    return true;
-  const Function *Scope = CtxI.getFunction();
-  if (auto *A = dyn_cast<Argument>(&V))
-    return A->getParent() == Scope;
-  if (auto *I = dyn_cast<Instruction>(&V))
-    if (I->getFunction() == Scope) {
-      const DominatorTree *DT =
-          InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Scope);
-      return DT && DT->dominates(I, &CtxI);
-    }
-  return false;
-}
-
 Value *AA::getWithType(Value &V, Type &Ty) {
   if (V.getType() == &Ty)
     return &V;
@@ -626,20 +609,8 @@ void IRPosition::verify() {
 Optional<Constant *>
 Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA,
                                bool &UsedAssumedInformation) {
-  // First check all callbacks provided by outside AAs. If any of them returns
-  // a non-null value that is 
diff erent from the associated value, or None, we
-  // assume it's simpliied.
-  IRPosition IRP = IRPosition::value(V, AA.getCallBaseContext());
-  for (auto &CB : SimplificationCallbacks[IRP]) {
-    Optional<Value *> SimplifiedV = CB(IRP, &AA, UsedAssumedInformation);
-    if (!SimplifiedV.hasValue())
-      return llvm::None;
-    if (*SimplifiedV && *SimplifiedV != &IRP.getAssociatedValue() &&
-        isa<Constant>(*SimplifiedV))
-      return cast<Constant>(*SimplifiedV);
-  }
-  const auto &ValueSimplifyAA =
-      getAAFor<AAValueSimplify>(AA, IRP, DepClassTy::NONE);
+  const auto &ValueSimplifyAA = getAAFor<AAValueSimplify>(
+      AA, IRPosition::value(V, AA.getCallBaseContext()), DepClassTy::NONE);
   Optional<Value *> SimplifiedV =
       ValueSimplifyAA.getAssumedSimplifiedValue(*this);
   bool IsKnown = ValueSimplifyAA.isAtFixpoint();
@@ -699,21 +670,6 @@ Attributor::getAssumedSimplified(const IRPosition &IRP,
   return const_cast<Value *>(&IRP.getAssociatedValue());
 }
 
-Optional<Value *> Attributor::translateArgumentToCallSiteContent(
-    Optional<Value *> V, CallBase &CB, const AbstractAttribute &AA,
-    bool &UsedAssumedInformation) {
-  if (!V.hasValue())
-    return V;
-  if (*V == nullptr || isa<Constant>(*V))
-    return V;
-  if (auto *Arg = dyn_cast<Argument>(*V))
-    if (!Arg->hasPointeeInMemoryValueAttr())
-      return getAssumedSimplified(
-          IRPosition::callsite_argument(CB, Arg->getArgNo()), AA,
-          UsedAssumedInformation);
-  return nullptr;
-}
-
 Attributor::~Attributor() {
   // The abstract attributes are allocated via the BumpPtrAllocator Allocator,
   // thus we cannot delete them. We can, and want to, destruct them though.
@@ -725,24 +681,21 @@ Attributor::~Attributor() {
 
 bool Attributor::isAssumedDead(const AbstractAttribute &AA,
                                const AAIsDead *FnLivenessAA,
-                               bool &UsedAssumedInformation,
                                bool CheckBBLivenessOnly, DepClassTy DepClass) {
   const IRPosition &IRP = AA.getIRPosition();
   if (!Functions.count(IRP.getAnchorScope()))
     return false;
-  return isAssumedDead(IRP, &AA, FnLivenessAA, UsedAssumedInformation,
-                       CheckBBLivenessOnly, DepClass);
+  return isAssumedDead(IRP, &AA, FnLivenessAA, CheckBBLivenessOnly, DepClass);
 }
 
 bool Attributor::isAssumedDead(const Use &U,
                                const AbstractAttribute *QueryingAA,
                                const AAIsDead *FnLivenessAA,
-                               bool &UsedAssumedInformation,
                                bool CheckBBLivenessOnly, DepClassTy DepClass) {
   Instruction *UserI = dyn_cast<Instruction>(U.getUser());
   if (!UserI)
     return isAssumedDead(IRPosition::value(*U.get()), QueryingAA, FnLivenessAA,
-                         UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
+                         CheckBBLivenessOnly, DepClass);
 
   if (auto *CB = dyn_cast<CallBase>(UserI)) {
     // For call site argument uses we can check if the argument is
@@ -751,27 +704,25 @@ bool Attributor::isAssumedDead(const Use &U,
       const IRPosition &CSArgPos =
           IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
       return isAssumedDead(CSArgPos, QueryingAA, FnLivenessAA,
-                           UsedAssumedInformation, CheckBBLivenessOnly,
-                           DepClass);
+                           CheckBBLivenessOnly, DepClass);
     }
   } else if (ReturnInst *RI = dyn_cast<ReturnInst>(UserI)) {
     const IRPosition &RetPos = IRPosition::returned(*RI->getFunction());
-    return isAssumedDead(RetPos, QueryingAA, FnLivenessAA,
-                         UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
+    return isAssumedDead(RetPos, QueryingAA, FnLivenessAA, CheckBBLivenessOnly,
+                         DepClass);
   } else if (PHINode *PHI = dyn_cast<PHINode>(UserI)) {
     BasicBlock *IncomingBB = PHI->getIncomingBlock(U);
     return isAssumedDead(*IncomingBB->getTerminator(), QueryingAA, FnLivenessAA,
-                         UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
+                         CheckBBLivenessOnly, DepClass);
   }
 
   return isAssumedDead(IRPosition::value(*UserI), QueryingAA, FnLivenessAA,
-                       UsedAssumedInformation, CheckBBLivenessOnly, DepClass);
+                       CheckBBLivenessOnly, DepClass);
 }
 
 bool Attributor::isAssumedDead(const Instruction &I,
                                const AbstractAttribute *QueryingAA,
                                const AAIsDead *FnLivenessAA,
-                               bool &UsedAssumedInformation,
                                bool CheckBBLivenessOnly, DepClassTy DepClass) {
   const IRPosition::CallBaseContext *CBCtx =
       QueryingAA ? QueryingAA->getCallBaseContext() : nullptr;
@@ -787,8 +738,6 @@ bool Attributor::isAssumedDead(const Instruction &I,
       FnLivenessAA->isAssumedDead(&I)) {
     if (QueryingAA)
       recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
-    if (!FnLivenessAA->isKnownDead(&I))
-      UsedAssumedInformation = true;
     return true;
   }
 
@@ -804,8 +753,6 @@ bool Attributor::isAssumedDead(const Instruction &I,
   if (IsDeadAA.isAssumedDead()) {
     if (QueryingAA)
       recordDependence(IsDeadAA, *QueryingAA, DepClass);
-    if (!IsDeadAA.isKnownDead())
-      UsedAssumedInformation = true;
     return true;
   }
 
@@ -815,11 +762,10 @@ bool Attributor::isAssumedDead(const Instruction &I,
 bool Attributor::isAssumedDead(const IRPosition &IRP,
                                const AbstractAttribute *QueryingAA,
                                const AAIsDead *FnLivenessAA,
-                               bool &UsedAssumedInformation,
                                bool CheckBBLivenessOnly, DepClassTy DepClass) {
   Instruction *CtxI = IRP.getCtxI();
   if (CtxI &&
-      isAssumedDead(*CtxI, QueryingAA, FnLivenessAA, UsedAssumedInformation,
+      isAssumedDead(*CtxI, QueryingAA, FnLivenessAA,
                     /* CheckBBLivenessOnly */ true,
                     CheckBBLivenessOnly ? DepClass : DepClassTy::OPTIONAL))
     return true;
@@ -842,8 +788,6 @@ bool Attributor::isAssumedDead(const IRPosition &IRP,
   if (IsDeadAA->isAssumedDead()) {
     if (QueryingAA)
       recordDependence(*IsDeadAA, *QueryingAA, DepClass);
-    if (!IsDeadAA->isKnownDead())
-      UsedAssumedInformation = true;
     return true;
   }
 
@@ -858,6 +802,19 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
   if (V.use_empty())
     return true;
 
+  // If the value is replaced by another one, for now a constant, we do not have
+  // uses. Note that this requires users of `checkForAllUses` to not recurse but
+  // instead use the `follow` callback argument to look at transitive users,
+  // however, that should be clear from the presence of the argument.
+  bool UsedAssumedInformation = false;
+  Optional<Constant *> C =
+      getAssumedConstant(V, QueryingAA, UsedAssumedInformation);
+  if (C.hasValue() && C.getValue()) {
+    LLVM_DEBUG(dbgs() << "[Attributor] Value is simplified, uses skipped: " << V
+                      << " -> " << *C.getValue() << "\n");
+    return true;
+  }
+
   const IRPosition &IRP = QueryingAA.getIRPosition();
   SmallVector<const Use *, 16> Worklist;
   SmallPtrSet<const Use *, 16> Visited;
@@ -880,8 +837,7 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
       continue;
     LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << **U << " in "
                       << *U->getUser() << "\n");
-    bool UsedAssumedInformation = false;
-    if (isAssumedDead(*U, &QueryingAA, LivenessAA, UsedAssumedInformation,
+    if (isAssumedDead(*U, &QueryingAA, LivenessAA,
                       /* CheckBBLivenessOnly */ false, LivenessDepClass)) {
       LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
       continue;
@@ -945,9 +901,7 @@ bool Attributor::checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
     const Use &U = *Uses[u];
     LLVM_DEBUG(dbgs() << "[Attributor] Check use: " << *U << " in "
                       << *U.getUser() << "\n");
-    bool UsedAssumedInformation = false;
-    if (isAssumedDead(U, QueryingAA, nullptr, UsedAssumedInformation,
-                      /* CheckBBLivenessOnly */ true)) {
+    if (isAssumedDead(U, QueryingAA, nullptr, /* CheckBBLivenessOnly */ true)) {
       LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
       continue;
     }
@@ -1066,8 +1020,7 @@ static bool checkForAllInstructionsImpl(
     Attributor *A, InformationCache::OpcodeInstMapTy &OpcodeInstMap,
     function_ref<bool(Instruction &)> Pred, const AbstractAttribute *QueryingAA,
     const AAIsDead *LivenessAA, const ArrayRef<unsigned> &Opcodes,
-    bool &UsedAssumedInformation, bool CheckBBLivenessOnly = false,
-    bool CheckPotentiallyDead = false) {
+    bool CheckBBLivenessOnly = false) {
   for (unsigned Opcode : Opcodes) {
     // Check if we have instructions with this opcode at all first.
     auto *Insts = OpcodeInstMap.lookup(Opcode);
@@ -1076,9 +1029,8 @@ static bool checkForAllInstructionsImpl(
 
     for (Instruction *I : *Insts) {
       // Skip dead instructions.
-      if (A && !CheckPotentiallyDead &&
-          A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
-                           UsedAssumedInformation, CheckBBLivenessOnly))
+      if (A && A->isAssumedDead(IRPosition::value(*I), QueryingAA, LivenessAA,
+                                CheckBBLivenessOnly))
         continue;
 
       if (!Pred(*I))
@@ -1091,9 +1043,7 @@ static bool checkForAllInstructionsImpl(
 bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
                                          const AbstractAttribute &QueryingAA,
                                          const ArrayRef<unsigned> &Opcodes,
-                                         bool &UsedAssumedInformation,
-                                         bool CheckBBLivenessOnly,
-                                         bool CheckPotentiallyDead) {
+                                         bool CheckBBLivenessOnly) {
 
   const IRPosition &IRP = QueryingAA.getIRPosition();
   // Since we need to provide instructions we have to have an exact definition.
@@ -1104,23 +1054,21 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
   // TODO: use the function scope once we have call site AAReturnedValues.
   const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
   const auto *LivenessAA =
-      (CheckBBLivenessOnly || CheckPotentiallyDead)
+      CheckBBLivenessOnly
           ? nullptr
           : &(getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
 
   auto &OpcodeInstMap =
       InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction);
   if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
-                                   LivenessAA, Opcodes, UsedAssumedInformation,
-                                   CheckBBLivenessOnly, CheckPotentiallyDead))
+                                   LivenessAA, Opcodes, CheckBBLivenessOnly))
     return false;
 
   return true;
 }
 
 bool Attributor::checkForAllReadWriteInstructions(
-    function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA,
-    bool &UsedAssumedInformation) {
+    function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA) {
 
   const Function *AssociatedFunction =
       QueryingAA.getIRPosition().getAssociatedFunction();
@@ -1135,8 +1083,7 @@ bool Attributor::checkForAllReadWriteInstructions(
   for (Instruction *I :
        InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
     // Skip dead instructions.
-    if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA,
-                      UsedAssumedInformation))
+    if (isAssumedDead(IRPosition::value(*I), &QueryingAA, &LivenessAA))
       continue;
 
     if (!Pred(*I))
@@ -1162,6 +1109,7 @@ void Attributor::runTillFixpoint() {
   else
     MaxFixedPointIterations = SetFixpointIterations;
 
+
   SmallVector<AbstractAttribute *, 32> ChangedAAs;
   SetVector<AbstractAttribute *> Worklist, InvalidAAs;
   Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
@@ -1314,9 +1262,7 @@ ChangeStatus Attributor::manifestAttributes() {
       continue;
 
     // Skip dead code.
-    bool UsedAssumedInformation = false;
-    if (isAssumedDead(*AA, nullptr, UsedAssumedInformation,
-                      /* CheckBBLivenessOnly */ true))
+    if (isAssumedDead(*AA, nullptr, /* CheckBBLivenessOnly */ true))
       continue;
     // Check if the manifest debug counter that allows skipping manifestation of
     // AAs
@@ -1432,17 +1378,11 @@ ChangeStatus Attributor::cleanupIR() {
 
     // Do not replace uses in returns if the value is a must-tail call we will
     // not delete.
-    if (auto *RI = dyn_cast<ReturnInst>(U->getUser())) {
+    if (isa<ReturnInst>(U->getUser()))
       if (auto *CI = dyn_cast<CallInst>(OldV->stripPointerCasts()))
         if (CI->isMustTailCall() &&
             (!ToBeDeletedInsts.count(CI) || !isRunOn(*CI->getCaller())))
           return;
-      // If we rewrite a return and the new value is not an argument, strip the
-      // `returned` attribute as it is wrong now.
-      if (!isa<Argument>(NewV))
-        for (auto &Arg : RI->getFunction()->args())
-          Arg.removeAttr(Attribute::Returned);
-    }
 
     // Do not perform call graph altering changes outside the SCC.
     if (auto *CB = dyn_cast<CallBase>(U->getUser()))
@@ -1680,9 +1620,7 @@ ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
 
   auto &AAState = AA.getState();
   ChangeStatus CS = ChangeStatus::UNCHANGED;
-  bool UsedAssumedInformation = false;
-  if (!isAssumedDead(AA, nullptr, UsedAssumedInformation,
-                     /* CheckBBLivenessOnly */ true))
+  if (!isAssumedDead(AA, nullptr, /* CheckBBLivenessOnly */ true))
     CS = AA.update(*this);
 
   if (DV.empty()) {
@@ -1847,11 +1785,9 @@ bool Attributor::isValidFunctionSignatureRewrite(
 
   // Forbid must-tail calls for now.
   // TODO:
-  bool UsedAssumedInformation = false;
   auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
   if (!checkForAllInstructionsImpl(nullptr, OpcodeInstMap, InstPred, nullptr,
-                                   nullptr, {Instruction::Call},
-                                   UsedAssumedInformation)) {
+                                   nullptr, {Instruction::Call})) {
     LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite due to instructions\n");
     return false;
   }
@@ -2369,7 +2305,10 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
     if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) {
 
       IRPosition CBRetPos = IRPosition::callsite_returned(CB);
-      getOrCreateAAFor<AAValueSimplify>(CBRetPos);
+
+      // Call site return integer values might be limited by a constant range.
+      if (Callee->getReturnType()->isIntegerTy())
+        getOrCreateAAFor<AAValueConstantRange>(CBRetPos);
     }
 
     for (int I = 0, E = CB.getNumArgOperands(); I < E; ++I) {
@@ -2418,12 +2357,10 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
 
   auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(F);
   bool Success;
-  bool UsedAssumedInformation = false;
   Success = checkForAllInstructionsImpl(
       nullptr, OpcodeInstMap, CallSitePred, nullptr, nullptr,
       {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
-       (unsigned)Instruction::Call},
-      UsedAssumedInformation);
+       (unsigned)Instruction::Call});
   (void)Success;
   assert(Success && "Expected the check call to be successful!");
 
@@ -2438,8 +2375,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   };
   Success = checkForAllInstructionsImpl(
       nullptr, OpcodeInstMap, LoadStorePred, nullptr, nullptr,
-      {(unsigned)Instruction::Load, (unsigned)Instruction::Store},
-      UsedAssumedInformation);
+      {(unsigned)Instruction::Load, (unsigned)Instruction::Store});
   (void)Success;
   assert(Success && "Expected the check call to be successful!");
 }

diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b9f809bcc3ec..26ed42ea247c 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -11,10 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Constants.h"
 #include "llvm/Transforms/IPO/Attributor.h"
 
-#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -29,12 +27,9 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/NoFolder.h"
-#include "llvm/Support/Alignment.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO/ArgumentPromotion.h"
@@ -248,10 +243,9 @@ static Value *constructPointer(Type *ResTy, Type *PtrElemTy, Value *Ptr,
 /// once. Note that the value used for the callback may still be the value
 /// associated with \p IRP (due to PHIs). To limit how much effort is invested,
 /// we will never visit more values than specified by \p MaxValues.
-template <typename StateTy>
+template <typename AAType, typename StateTy>
 static bool genericValueTraversal(
-    Attributor &A, IRPosition IRP, const AbstractAttribute &QueryingAA,
-    StateTy &State,
+    Attributor &A, IRPosition IRP, const AAType &QueryingAA, StateTy &State,
     function_ref<bool(Value &, const Instruction *, StateTy &, bool)>
         VisitValueCB,
     const Instruction *CtxI, bool UseValueSimplify = true, int MaxValues = 16,
@@ -265,11 +259,10 @@ static bool genericValueTraversal(
         DepClassTy::NONE);
   bool AnyDead = false;
 
-  Value *InitialV = &IRP.getAssociatedValue();
   using Item = std::pair<Value *, const Instruction *>;
   SmallSet<Item, 16> Visited;
   SmallVector<Item, 16> Worklist;
-  Worklist.push_back({InitialV, CtxI});
+  Worklist.push_back({&IRP.getAssociatedValue(), CtxI});
 
   int Iteration = 0;
   do {
@@ -308,22 +301,8 @@ static bool genericValueTraversal(
       continue;
     }
 
-    // Look through select instructions, visit assumed potential values.
+    // Look through select instructions, visit both potential values.
     if (auto *SI = dyn_cast<SelectInst>(V)) {
-      bool UsedAssumedInformation = false;
-      Optional<Constant *> C = A.getAssumedConstant(
-          *SI->getCondition(), QueryingAA, UsedAssumedInformation);
-      bool NoValueYet = !C.hasValue();
-      if (NoValueYet || isa_and_nonnull<UndefValue>(*C))
-        continue;
-      if (auto *CI = dyn_cast_or_null<ConstantInt>(*C)) {
-        if (CI->isZero())
-          Worklist.push_back({SI->getFalseValue(), CtxI});
-        else
-          Worklist.push_back({SI->getTrueValue(), CtxI});
-        continue;
-      }
-      // We could not simplify the condition, assume both values.(
       Worklist.push_back({SI->getTrueValue(), CtxI});
       Worklist.push_back({SI->getFalseValue(), CtxI});
       continue;
@@ -335,9 +314,8 @@ static bool genericValueTraversal(
              "Expected liveness in the presence of instructions!");
       for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
         BasicBlock *IncomingBB = PHI->getIncomingBlock(u);
-        bool UsedAssumedInformation = false;
         if (A.isAssumedDead(*IncomingBB->getTerminator(), &QueryingAA,
-                            LivenessAA, UsedAssumedInformation,
+                            LivenessAA,
                             /* CheckBBLivenessOnly */ true)) {
           AnyDead = true;
           continue;
@@ -373,26 +351,6 @@ static bool genericValueTraversal(
   return true;
 }
 
-static bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
-                                        SmallVectorImpl<Value *> &Objects,
-                                        const AbstractAttribute &QueryingAA,
-                                        const Instruction *CtxI) {
-  auto StripCB = [&](Value *V) { return getUnderlyingObject(V); };
-  SmallPtrSet<Value *, 8> SeenObjects;
-  auto VisitValueCB = [&SeenObjects](Value &Val, const Instruction *,
-                                     SmallVectorImpl<Value *> &Objects,
-                                     bool) -> bool {
-    if (SeenObjects.insert(&Val).second)
-      Objects.push_back(&Val);
-    return true;
-  };
-  if (!genericValueTraversal<decltype(Objects)>(
-          A, IRPosition::value(Ptr), QueryingAA, Objects, VisitValueCB, CtxI,
-          true, 32, StripCB))
-    return false;
-  return true;
-}
-
 const Value *stripAndAccumulateMinimalOffsets(
     Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val,
     const DataLayout &DL, APInt &Offset, bool AllowNonInbounds,
@@ -795,9 +753,7 @@ struct AANoUnwindImpl : AANoUnwind {
       return false;
     };
 
-    bool UsedAssumedInformation = false;
-    if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes,
-                                   UsedAssumedInformation))
+    if (!A.checkForAllInstructions(CheckForNoUnwind, *this, Opcodes))
       return indicatePessimisticFixpoint();
 
     return ChangeStatus::UNCHANGED;
@@ -854,6 +810,13 @@ class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
   /// return instructions that might return them.
   MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues;
 
+  /// Mapping to remember the number of returned values for a call site such
+  /// that we can avoid updates if nothing changed.
+  DenseMap<const CallBase *, unsigned> NumReturnedValuesPerKnownAA;
+
+  /// Set of unresolved calls returned by the associated function.
+  SmallSetVector<CallBase *, 4> UnresolvedCalls;
+
   /// State flags
   ///
   ///{
@@ -920,6 +883,10 @@ class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
     return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
   }
 
+  const SmallSetVector<CallBase *, 4> &getUnresolvedCalls() const override {
+    return UnresolvedCalls;
+  }
+
   /// Return the number of potential return values, -1 if unknown.
   size_t getNumReturnValues() const override {
     return isValidState() ? ReturnedValues.size() : -1;
@@ -974,6 +941,16 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
   // Bookkeeping.
   STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
                   "Number of function with unique return");
+
+  // Callback to replace the uses of CB with the constant C.
+  auto ReplaceCallSiteUsersWith = [&A](CallBase &CB, Constant &C) {
+    if (CB.use_empty())
+      return ChangeStatus::UNCHANGED;
+    if (A.changeValueAfterManifest(CB, C))
+      return ChangeStatus::CHANGED;
+    return ChangeStatus::UNCHANGED;
+  };
+
   // If the assumed unique return value is an argument, annotate it.
   if (auto *UniqueRVArg = dyn_cast<Argument>(UniqueRV.getValue())) {
     if (UniqueRVArg->getType()->canLosslesslyBitCastTo(
@@ -981,13 +958,40 @@ ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
       getIRPosition() = IRPosition::argument(*UniqueRVArg);
       Changed = IRAttribute::manifest(A);
     }
+  } else if (auto *RVC = dyn_cast<Constant>(UniqueRV.getValue())) {
+    // We can replace the returned value with the unique returned constant.
+    Value &AnchorValue = getAnchorValue();
+    if (Function *F = dyn_cast<Function>(&AnchorValue)) {
+      for (const Use &U : F->uses())
+        if (CallBase *CB = dyn_cast<CallBase>(U.getUser()))
+          if (CB->isCallee(&U)) {
+            Constant *RVCCast =
+                CB->getType() == RVC->getType()
+                    ? RVC
+                    : ConstantExpr::getPointerCast(RVC, CB->getType());
+            Changed = ReplaceCallSiteUsersWith(*CB, *RVCCast) | Changed;
+          }
+    } else {
+      assert(isa<CallBase>(AnchorValue) &&
+             "Expcected a function or call base anchor!");
+      Constant *RVCCast =
+          AnchorValue.getType() == RVC->getType()
+              ? RVC
+              : ConstantExpr::getPointerCast(RVC, AnchorValue.getType());
+      Changed = ReplaceCallSiteUsersWith(cast<CallBase>(AnchorValue), *RVCCast);
+    }
+    if (Changed == ChangeStatus::CHANGED)
+      STATS_DECLTRACK(UniqueConstantReturnValue, FunctionReturn,
+                      "Number of function returns replaced by constant return");
   }
+
   return Changed;
 }
 
 const std::string AAReturnedValuesImpl::getAsStr() const {
   return (isAtFixpoint() ? "returns(#" : "may-return(#") +
-         (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")";
+         (isValidState() ? std::to_string(getNumReturnValues()) : "?") +
+         ")[#UC: " + std::to_string(UnresolvedCalls.size()) + "]";
 }
 
 Optional<Value *>
@@ -1021,6 +1025,11 @@ bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
   // encountered an overdefined one during an update.
   for (auto &It : ReturnedValues) {
     Value *RV = It.first;
+
+    CallBase *CB = dyn_cast<CallBase>(RV);
+    if (CB && !UnresolvedCalls.count(CB))
+      continue;
+
     if (!Pred(*RV, It.second))
       return false;
   }
@@ -1029,37 +1038,180 @@ bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
 }
 
 ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
-  ChangeStatus Changed = ChangeStatus::UNCHANGED;
+  size_t NumUnresolvedCalls = UnresolvedCalls.size();
+  bool Changed = false;
+
+  // State used in the value traversals starting in returned values.
+  struct RVState {
+    // The map in which we collect return values -> return instrs.
+    decltype(ReturnedValues) &RetValsMap;
+    // The flag to indicate a change.
+    bool &Changed;
+    // The return instrs we come from.
+    SmallSetVector<ReturnInst *, 4> RetInsts;
+  };
 
-  auto ReturnValueCB = [&](Value &V, const Instruction *CtxI, ReturnInst &Ret,
-                           bool) -> bool {
-    bool UsedAssumedInformation = false;
-    Optional<Value *> SimpleRetVal =
-        A.getAssumedSimplified(V, *this, UsedAssumedInformation);
-    if (!SimpleRetVal.hasValue())
-      return true;
-    Value *RetVal = *SimpleRetVal ? *SimpleRetVal : &V;
-    assert(AA::isValidInScope(*RetVal, Ret.getFunction()) &&
-           "Assumed returned value should be valid in function scope!");
-    if (ReturnedValues[RetVal].insert(&Ret))
-      Changed = ChangeStatus::CHANGED;
+  // Callback for a leaf value returned by the associated function.
+  auto VisitValueCB = [](Value &Val, const Instruction *, RVState &RVS,
+                         bool) -> bool {
+    auto Size = RVS.RetValsMap[&Val].size();
+    RVS.RetValsMap[&Val].insert(RVS.RetInsts.begin(), RVS.RetInsts.end());
+    bool Inserted = RVS.RetValsMap[&Val].size() != Size;
+    RVS.Changed |= Inserted;
+    LLVM_DEBUG({
+      if (Inserted)
+        dbgs() << "[AAReturnedValues] 1 Add new returned value " << Val
+               << " => " << RVS.RetInsts.size() << "\n";
+    });
     return true;
   };
 
-  auto ReturnInstCB = [&](Instruction &I) {
+  // Helper method to invoke the generic value traversal.
+  auto VisitReturnedValue = [&](Value &RV, RVState &RVS,
+                                const Instruction *CtxI) {
+    IRPosition RetValPos = IRPosition::value(RV, getCallBaseContext());
+    return genericValueTraversal<AAReturnedValues, RVState>(
+        A, RetValPos, *this, RVS, VisitValueCB, CtxI,
+        /* UseValueSimplify */ false);
+  };
+
+  // Callback for all "return intructions" live in the associated function.
+  auto CheckReturnInst = [this, &VisitReturnedValue, &Changed](Instruction &I) {
     ReturnInst &Ret = cast<ReturnInst>(I);
-    return genericValueTraversal<ReturnInst>(
-        A, IRPosition::value(*Ret.getReturnValue()), *this, Ret, ReturnValueCB,
-        &I);
+    RVState RVS({ReturnedValues, Changed, {}});
+    RVS.RetInsts.insert(&Ret);
+    return VisitReturnedValue(*Ret.getReturnValue(), RVS, &I);
   };
 
-  // Discover returned values from all live returned instructions in the
-  // associated function.
-  bool UsedAssumedInformation = false;
-  if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret},
-                                 UsedAssumedInformation))
+  // Start by discovering returned values from all live returned instructions in
+  // the associated function.
+  if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}))
     return indicatePessimisticFixpoint();
-  return Changed;
+
+  // Once returned values "directly" present in the code are handled we try to
+  // resolve returned calls. To avoid modifications to the ReturnedValues map
+  // while we iterate over it we kept record of potential new entries in a copy
+  // map, NewRVsMap.
+  decltype(ReturnedValues) NewRVsMap;
+
+  auto HandleReturnValue = [&](Value *RV,
+                               SmallSetVector<ReturnInst *, 4> &RIs) {
+    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV << " by #"
+                      << RIs.size() << " RIs\n");
+    CallBase *CB = dyn_cast<CallBase>(RV);
+    if (!CB || UnresolvedCalls.count(CB))
+      return;
+
+    if (!CB->getCalledFunction()) {
+      LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
+                        << "\n");
+      UnresolvedCalls.insert(CB);
+      return;
+    }
+
+    // TODO: use the function scope once we have call site AAReturnedValues.
+    const auto &RetValAA = A.getAAFor<AAReturnedValues>(
+        *this, IRPosition::function(*CB->getCalledFunction()),
+        DepClassTy::REQUIRED);
+    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Found another AAReturnedValues: "
+                      << RetValAA << "\n");
+
+    // Skip dead ends, thus if we do not know anything about the returned
+    // call we mark it as unresolved and it will stay that way.
+    if (!RetValAA.getState().isValidState()) {
+      LLVM_DEBUG(dbgs() << "[AAReturnedValues] Unresolved call: " << *CB
+                        << "\n");
+      UnresolvedCalls.insert(CB);
+      return;
+    }
+
+    // Do not try to learn partial information. If the callee has unresolved
+    // return values we will treat the call as unresolved/opaque.
+    auto &RetValAAUnresolvedCalls = RetValAA.getUnresolvedCalls();
+    if (!RetValAAUnresolvedCalls.empty()) {
+      UnresolvedCalls.insert(CB);
+      return;
+    }
+
+    // Now check if we can track transitively returned values. If possible, thus
+    // if all return value can be represented in the current scope, do so.
+    bool Unresolved = false;
+    for (auto &RetValAAIt : RetValAA.returned_values()) {
+      Value *RetVal = RetValAAIt.first;
+      if (isa<Argument>(RetVal) || isa<CallBase>(RetVal) ||
+          isa<Constant>(RetVal))
+        continue;
+      // Anything that did not fit in the above categories cannot be resolved,
+      // mark the call as unresolved.
+      LLVM_DEBUG(dbgs() << "[AAReturnedValues] transitively returned value "
+                           "cannot be translated: "
+                        << *RetVal << "\n");
+      UnresolvedCalls.insert(CB);
+      Unresolved = true;
+      break;
+    }
+
+    if (Unresolved)
+      return;
+
+    // Now track transitively returned values.
+    unsigned &NumRetAA = NumReturnedValuesPerKnownAA[CB];
+    if (NumRetAA == RetValAA.getNumReturnValues()) {
+      LLVM_DEBUG(dbgs() << "[AAReturnedValues] Skip call as it has not "
+                           "changed since it was seen last\n");
+      return;
+    }
+    NumRetAA = RetValAA.getNumReturnValues();
+
+    for (auto &RetValAAIt : RetValAA.returned_values()) {
+      Value *RetVal = RetValAAIt.first;
+      if (Argument *Arg = dyn_cast<Argument>(RetVal)) {
+        // Arguments are mapped to call site operands and we begin the traversal
+        // again.
+        bool Unused = false;
+        RVState RVS({NewRVsMap, Unused, RetValAAIt.second});
+        VisitReturnedValue(*CB->getArgOperand(Arg->getArgNo()), RVS, CB);
+        continue;
+      }
+      if (isa<CallBase>(RetVal)) {
+        // Call sites are resolved by the callee attribute over time, no need to
+        // do anything for us.
+        continue;
+      }
+      if (isa<Constant>(RetVal)) {
+        // Constants are valid everywhere, we can simply take them.
+        NewRVsMap[RetVal].insert(RIs.begin(), RIs.end());
+        continue;
+      }
+    }
+  };
+
+  for (auto &It : ReturnedValues)
+    HandleReturnValue(It.first, It.second);
+
+  // Because processing the new information can again lead to new return values
+  // we have to be careful and iterate until this iteration is complete. The
+  // idea is that we are in a stable state at the end of an update. All return
+  // values have been handled and properly categorized. We might not update
+  // again if we have not requested a non-fix attribute so we cannot "wait" for
+  // the next update to analyze a new return value.
+  while (!NewRVsMap.empty()) {
+    auto It = std::move(NewRVsMap.back());
+    NewRVsMap.pop_back();
+
+    assert(!It.second.empty() && "Entry does not add anything.");
+    auto &ReturnInsts = ReturnedValues[It.first];
+    for (ReturnInst *RI : It.second)
+      if (ReturnInsts.insert(RI)) {
+        LLVM_DEBUG(dbgs() << "[AAReturnedValues] Add new returned value "
+                          << *It.first << " => " << *RI << "\n");
+        HandleReturnValue(It.first, ReturnInsts);
+        Changed = true;
+      }
+  }
+
+  Changed |= (NumUnresolvedCalls != UnresolvedCalls.size());
+  return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
 }
 
 struct AAReturnedValuesFunction final : public AAReturnedValuesImpl {
@@ -1190,11 +1342,8 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
     return !cast<CallBase>(I).isConvergent();
   };
 
-  bool UsedAssumedInformation = false;
-  if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this,
-                                          UsedAssumedInformation) ||
-      !A.checkForAllCallLikeInstructions(CheckForNoSync, *this,
-                                         UsedAssumedInformation))
+  if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) ||
+      !A.checkForAllCallLikeInstructions(CheckForNoSync, *this))
     return indicatePessimisticFixpoint();
 
   return ChangeStatus::UNCHANGED;
@@ -1254,9 +1403,7 @@ struct AANoFreeImpl : public AANoFree {
       return NoFreeAA.isAssumedNoFree();
     };
 
-    bool UsedAssumedInformation = false;
-    if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this,
-                                           UsedAssumedInformation))
+    if (!A.checkForAllCallLikeInstructions(CheckForNoFree, *this))
       return indicatePessimisticFixpoint();
     return ChangeStatus::UNCHANGED;
   }
@@ -1601,8 +1748,8 @@ struct AANonNullFloating : public AANonNullImpl {
     };
 
     StateType T;
-    if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
-                                          VisitValueCB, getCtxI()))
+    if (!genericValueTraversal<AANonNull, StateType>(
+            A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
       return indicatePessimisticFixpoint();
 
     return clampStateAndIndicateChange(getState(), T);
@@ -1718,9 +1865,7 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
       return true;
     };
 
-    bool UsedAssumedInformation = false;
-    if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this,
-                                           UsedAssumedInformation))
+    if (!A.checkForAllCallLikeInstructions(CheckForNoRecurse, *this))
       return indicatePessimisticFixpoint();
     return ChangeStatus::UNCHANGED;
   }
@@ -1777,8 +1922,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
       // If we reach here, we know we have an instruction
       // that accesses memory through a pointer operand,
       // for which getPointerOperand() should give it to us.
-      Value *PtrOp =
-          const_cast<Value *>(getPointerOperand(&I, /* AllowVolatile */ true));
+      const Value *PtrOp = getPointerOperand(&I, /* AllowVolatile */ true);
       assert(PtrOp &&
              "Expected pointer operand of memory accessing instruction");
 
@@ -1820,7 +1964,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
         return true;
 
       // We know we have a branch instruction.
-      auto *BrInst = cast<BranchInst>(&I);
+      auto BrInst = cast<BranchInst>(&I);
 
       // Unconditional branches are never considered UB.
       if (BrInst->isUnconditional())
@@ -1927,24 +2071,20 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
           return true;
         };
 
-    bool UsedAssumedInformation = false;
     A.checkForAllInstructions(InspectMemAccessInstForUB, *this,
                               {Instruction::Load, Instruction::Store,
                                Instruction::AtomicCmpXchg,
                                Instruction::AtomicRMW},
-                              UsedAssumedInformation,
                               /* CheckBBLivenessOnly */ true);
     A.checkForAllInstructions(InspectBrInstForUB, *this, {Instruction::Br},
-                              UsedAssumedInformation,
                               /* CheckBBLivenessOnly */ true);
-    A.checkForAllCallLikeInstructions(InspectCallSiteForUB, *this,
-                                      UsedAssumedInformation);
+    A.checkForAllCallLikeInstructions(InspectCallSiteForUB, *this);
 
     // If the returned position of the anchor scope has noundef attriubte, check
     // all returned instructions.
     if (!getAnchorScope()->getReturnType()->isVoidTy()) {
       const IRPosition &ReturnIRP = IRPosition::returned(*getAnchorScope());
-      if (!A.isAssumedDead(ReturnIRP, this, nullptr, UsedAssumedInformation)) {
+      if (!A.isAssumedDead(ReturnIRP, this, nullptr)) {
         auto &RetPosNoUndefAA =
             A.getAAFor<AANoUndef>(*this, ReturnIRP, DepClassTy::NONE);
         if (RetPosNoUndefAA.isKnownNoUndef())
@@ -2041,27 +2181,27 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
   // action was taken and the caller should stop.
   // Otherwise, we return the simplified value that the caller should
   // use for specific processing.
-  Optional<Value *> stopOnUndefOrAssumed(Attributor &A, Value *V,
+  Optional<Value *> stopOnUndefOrAssumed(Attributor &A, const Value *V,
                                          Instruction *I) {
     bool UsedAssumedInformation = false;
     Optional<Value *> SimplifiedV = A.getAssumedSimplified(
         IRPosition::value(*V), *this, UsedAssumedInformation);
-    if (!UsedAssumedInformation) {
+    if (UsedAssumedInformation) {
       // Don't depend on assumed values.
-      if (!SimplifiedV.hasValue()) {
-        // If it is known (which we tested above) but it doesn't have a value,
-        // then we can assume `undef` and hence the instruction is UB.
-        KnownUBInsts.insert(I);
-        return llvm::None;
-      }
-      if (*SimplifiedV != nullptr)
-        V = *SimplifiedV;
+      return llvm::None;
     }
-    if (isa<UndefValue>(V)) {
+    if (!SimplifiedV.hasValue()) {
+      // If it is known (which we tested above) but it doesn't have a value,
+      // then we can assume `undef` and hence the instruction is UB.
       KnownUBInsts.insert(I);
       return llvm::None;
     }
-    return V;
+    Value *Val = SimplifiedV.getValue();
+    if (isa<UndefValue>(Val)) {
+      KnownUBInsts.insert(I);
+      return llvm::None;
+    }
+    return Val;
   }
 };
 
@@ -2162,9 +2302,7 @@ struct AAWillReturnImpl : public AAWillReturn {
       return NoRecurseAA.isAssumedNoRecurse();
     };
 
-    bool UsedAssumedInformation = false;
-    if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this,
-                                           UsedAssumedInformation))
+    if (!A.checkForAllCallLikeInstructions(CheckForWillReturn, *this))
       return indicatePessimisticFixpoint();
 
     return ChangeStatus::UNCHANGED;
@@ -2662,19 +2800,6 @@ struct AAIsDeadValueImpl : public AAIsDead {
 
   /// Check if all uses are assumed dead.
   bool areAllUsesAssumedDead(Attributor &A, Value &V) {
-    // Callers might not check the type, void has no uses.
-    if (V.getType()->isVoidTy())
-      return true;
-
-    // If we replace a value with a constant there are no uses left afterwards.
-    if (!isa<Constant>(V)) {
-      bool UsedAssumedInformation = false;
-      Optional<Constant *> C =
-          A.getAssumedConstant(V, *this, UsedAssumedInformation);
-      if (!C.hasValue() || *C)
-        return true;
-    }
-
     auto UsePred = [&](const Use &U, bool &Follow) { return false; };
     // Explicitly set the dependence class to required because we want a long
     // chain of N dependent instructions to be considered live as soon as one is
@@ -2732,6 +2857,7 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
     Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
     if (!isAssumedSideEffectFree(A, I))
       return indicatePessimisticFixpoint();
+
     if (!areAllUsesAssumedDead(A, getAssociatedValue()))
       return indicatePessimisticFixpoint();
     return ChangeStatus::UNCHANGED;
@@ -2869,6 +2995,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
       IsAssumedSideEffectFree = false;
       Changed = ChangeStatus::CHANGED;
     }
+
     if (!areAllUsesAssumedDead(A, getAssociatedValue()))
       return indicatePessimisticFixpoint();
     return Changed;
@@ -2900,9 +3027,8 @@ struct AAIsDeadReturned : public AAIsDeadValueImpl {
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
 
-    bool UsedAssumedInformation = false;
     A.checkForAllInstructions([](Instruction &) { return true; }, *this,
-                              {Instruction::Ret}, UsedAssumedInformation);
+                              {Instruction::Ret});
 
     auto PredForCallSite = [&](AbstractCallSite ACS) {
       if (ACS.isCallbackCall() || !ACS.getInstruction())
@@ -2929,9 +3055,7 @@ struct AAIsDeadReturned : public AAIsDeadValueImpl {
         AnyChange |= A.changeUseAfterManifest(RI.getOperandUse(0), UV);
       return true;
     };
-    bool UsedAssumedInformation = false;
-    A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret},
-                              UsedAssumedInformation);
+    A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret});
     return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
   }
 
@@ -3499,8 +3623,8 @@ struct AADereferenceableFloating : AADereferenceableImpl {
     };
 
     DerefState T;
-    if (!genericValueTraversal<DerefState>(A, getIRPosition(), *this, T,
-                                           VisitValueCB, getCtxI()))
+    if (!genericValueTraversal<AADereferenceable, DerefState>(
+            A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
       return indicatePessimisticFixpoint();
 
     return clampStateAndIndicateChange(getState(), T);
@@ -3765,8 +3889,8 @@ struct AAAlignFloating : AAAlignImpl {
     };
 
     StateType T;
-    if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
-                                          VisitValueCB, getCtxI()))
+    if (!genericValueTraversal<AAAlign, StateType>(A, getIRPosition(), *this, T,
+                                                   VisitValueCB, getCtxI()))
       return indicatePessimisticFixpoint();
 
     // TODO: If we know we visited all incoming values, thus no are assumed
@@ -3892,10 +4016,8 @@ struct AANoReturnImpl : public AANoReturn {
   /// See AbstractAttribute::updateImpl(Attributor &A).
   virtual ChangeStatus updateImpl(Attributor &A) override {
     auto CheckForNoReturn = [](Instruction &) { return false; };
-    bool UsedAssumedInformation = false;
     if (!A.checkForAllInstructions(CheckForNoReturn, *this,
-                                   {(unsigned)Instruction::Ret},
-                                   UsedAssumedInformation))
+                                   {(unsigned)Instruction::Ret}))
       return indicatePessimisticFixpoint();
     return ChangeStatus::UNCHANGED;
   }
@@ -4157,10 +4279,8 @@ struct AACaptureUseTracker final : public CaptureTracker {
   /// See CaptureTracker::shouldExplore(...).
   bool shouldExplore(const Use *U) override {
     // Check liveness and ignore droppable users.
-    bool UsedAssumedInformation = false;
     return !U->getUser()->isDroppable() &&
-           !A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA,
-                            UsedAssumedInformation);
+           !A.isAssumedDead(*U, &NoCaptureAA, &IsDeadAA);
   }
 
   /// Update the state according to \p CapturedInMem, \p CapturedInInt, and
@@ -4413,7 +4533,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
   const std::string getAsStr() const override {
     LLVM_DEBUG({
       errs() << "SAV: " << SimplifiedAssociatedValue << " ";
-      if (SimplifiedAssociatedValue && *SimplifiedAssociatedValue)
+      if (SimplifiedAssociatedValue)
         errs() << "SAV: " << **SimplifiedAssociatedValue << " ";
     });
     return isValidState() ? (isAtFixpoint() ? "simplified" : "maybe-simple")
@@ -4425,38 +4545,18 @@ struct AAValueSimplifyImpl : AAValueSimplify {
 
   /// See AAValueSimplify::getAssumedSimplifiedValue()
   Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override {
+    if (!isValidState())
+      return const_cast<Value *>(&getAssociatedValue());
     return SimplifiedAssociatedValue;
   }
 
-  /// Return a value we can use as replacement for the associated one, or
-  /// nullptr if we don't have one that makes sense.
-  Value *getReplacementValue(Attributor &A) const {
-    Value *NewV;
-    NewV = SimplifiedAssociatedValue.hasValue()
-               ? SimplifiedAssociatedValue.getValue()
-               : UndefValue::get(getAssociatedType());
-    if (!NewV)
-      return nullptr;
-    NewV = AA::getWithType(*NewV, *getAssociatedType());
-    if (!NewV || NewV == &getAssociatedValue())
-      return nullptr;
-    const Instruction *CtxI = getCtxI();
-    if (CtxI && !AA::isValidAtPosition(*NewV, *CtxI, A.getInfoCache()))
-      return nullptr;
-    if (!CtxI && !AA::isValidInScope(*NewV, getAnchorScope()))
-      return nullptr;
-    return NewV;
-  }
-
   /// Helper function for querying AAValueSimplify and updating candicate.
   /// \param IRP The value position we are trying to unify with SimplifiedValue
   bool checkAndUpdate(Attributor &A, const AbstractAttribute &QueryingAA,
-                      const IRPosition &IRP, bool Simplify = true) {
+                      const IRPosition &IRP) {
     bool UsedAssumedInformation = false;
-    Optional<Value *> QueryingValueSimplified = &IRP.getAssociatedValue();
-    if (Simplify)
-      QueryingValueSimplified =
-          A.getAssumedSimplified(IRP, QueryingAA, UsedAssumedInformation);
+    Optional<Value *> QueryingValueSimplified =
+        A.getAssumedSimplified(IRP, QueryingAA, UsedAssumedInformation);
     return unionAssumed(QueryingValueSimplified);
   }
 
@@ -4495,14 +4595,24 @@ struct AAValueSimplifyImpl : AAValueSimplify {
   /// See AbstractAttribute::manifest(...).
   ChangeStatus manifest(Attributor &A) override {
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
-    if (getAssociatedValue().user_empty())
+
+    if (SimplifiedAssociatedValue.hasValue() &&
+        !SimplifiedAssociatedValue.getValue())
       return Changed;
 
-    if (auto *NewV = getReplacementValue(A)) {
-      LLVM_DEBUG(dbgs() << "[ValueSimplify] " << getAssociatedValue() << " -> "
-                        << *NewV << " :: " << *this << "\n");
-      if (A.changeValueAfterManifest(getAssociatedValue(), *NewV))
-        Changed = ChangeStatus::CHANGED;
+    Value &V = getAssociatedValue();
+    auto *C = SimplifiedAssociatedValue.hasValue()
+                  ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
+                  : UndefValue::get(V.getType());
+    if (C && C != &V && !V.user_empty()) {
+      Value *NewV = AA::getWithType(*C, *V.getType());
+      // We can replace the AssociatedValue with the constant.
+      if (NewV && NewV != &V) {
+        LLVM_DEBUG(dbgs() << "[ValueSimplify] " << V << " -> " << *NewV
+                          << " :: " << *this << "\n");
+        if (A.changeValueAfterManifest(V, *NewV))
+          Changed = ChangeStatus::CHANGED;
+      }
     }
 
     return Changed | AAValueSimplify::manifest(A);
@@ -4510,8 +4620,11 @@ struct AAValueSimplifyImpl : AAValueSimplify {
 
   /// See AbstractState::indicatePessimisticFixpoint(...).
   ChangeStatus indicatePessimisticFixpoint() override {
+    // NOTE: Associated value will be returned in a pessimistic fixpoint and is
+    // regarded as known. That's why`indicateOptimisticFixpoint` is called.
     SimplifiedAssociatedValue = &getAssociatedValue();
-    return AAValueSimplify::indicatePessimisticFixpoint();
+    indicateOptimisticFixpoint();
+    return ChangeStatus::CHANGED;
   }
 };
 
@@ -4562,27 +4675,14 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
       if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
         return false;
 
-      // Simplify the argument operand explicitly and check if the result is
-      // valid in the current scope. This avoids refering to simplified values
-      // in other functions, e.g., we don't want to say a an argument in a
-      // static function is actually an argument in a 
diff erent function.
-      Value &ArgOp = ACSArgPos.getAssociatedValue();
-      bool UsedAssumedInformation = false;
-      Optional<Value *> SimpleArgOp =
-          A.getAssumedSimplified(ACSArgPos, *this, UsedAssumedInformation);
-      if (!SimpleArgOp.hasValue())
-        return true;
-      Value *SimpleArgOpVal = *SimpleArgOp ? *SimpleArgOp : &ArgOp;
-      if (!AA::isValidInScope(*SimpleArgOpVal, getAnchorScope()))
-        return false;
-
       // We can only propagate thread independent values through callbacks.
       // This is 
diff erent to direct/indirect call sites because for them we
       // know the thread executing the caller and callee is the same. For
       // callbacks this is not guaranteed, thus a thread dependent value could
       // be 
diff erent for the caller and callee, making it invalid to propagate.
+      Value &ArgOp = ACSArgPos.getAssociatedValue();
       if (ACS.isCallbackCall())
-        if (auto *C = dyn_cast<Constant>(SimpleArgOpVal))
+        if (auto *C = dyn_cast<Constant>(&ArgOp))
           if (C->isThreadDependent())
             return false;
       return checkAndUpdate(A, *this, ACSArgPos);
@@ -4617,13 +4717,6 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
   AAValueSimplifyReturned(const IRPosition &IRP, Attributor &A)
       : AAValueSimplifyImpl(IRP, A) {}
 
-  /// See AAValueSimplify::getAssumedSimplifiedValue()
-  Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const override {
-    if (!isValidState())
-      return nullptr;
-    return SimplifiedAssociatedValue;
-  }
-
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     auto Before = SimplifiedAssociatedValue;
@@ -4645,25 +4738,36 @@ struct AAValueSimplifyReturned : AAValueSimplifyImpl {
   ChangeStatus manifest(Attributor &A) override {
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
 
-    if (auto *NewV = getReplacementValue(A)) {
-      auto PredForReturned =
-          [&](Value &, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
-            for (ReturnInst *RI : RetInsts) {
-              Value *ReturnedVal = RI->getReturnValue();
-              if (ReturnedVal == NewV || isa<UndefValue>(ReturnedVal))
-                return true;
-              assert(RI->getFunction() == getAnchorScope() &&
-                     "ReturnInst in wrong function!");
-              LLVM_DEBUG(dbgs()
-                         << "[ValueSimplify] " << *ReturnedVal << " -> "
-                         << *NewV << " in " << *RI << " :: " << *this << "\n");
-              if (A.changeUseAfterManifest(RI->getOperandUse(0), *NewV))
-                Changed = ChangeStatus::CHANGED;
-            }
+    if (SimplifiedAssociatedValue.hasValue() &&
+        !SimplifiedAssociatedValue.getValue())
+      return Changed | AAValueSimplify::manifest(A);
+
+    auto *C = SimplifiedAssociatedValue.hasValue()
+                  ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
+                  : UndefValue::get(getAssociatedType());
+    if (!C || C == &getAssociatedValue())
+      return Changed | AAValueSimplify::manifest(A);
+
+    auto PredForReturned =
+        [&](Value &V, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
+          // We can replace the AssociatedValue with the constant.
+          if (&V == C || isa<UndefValue>(V))
             return true;
-          };
-      A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this);
-    }
+
+          for (ReturnInst *RI : RetInsts) {
+            if (RI->getFunction() != getAnchorScope())
+              continue;
+            Value *NewV = AA::getWithType(*C, *RI->getReturnValue()->getType());
+            if (!NewV)
+              continue;
+            LLVM_DEBUG(dbgs() << "[ValueSimplify] " << V << " -> " << *NewV
+                              << " in " << *RI << " :: " << *this << "\n");
+            if (A.changeUseAfterManifest(RI->getOperandUse(0), *NewV))
+              Changed = ChangeStatus::CHANGED;
+          }
+          return true;
+        };
+    A.checkForAllReturnedValuesAndReturnInsts(PredForReturned, *this);
 
     return Changed | AAValueSimplify::manifest(A);
   }
@@ -4779,9 +4883,9 @@ struct AAValueSimplifyFloating : AAValueSimplifyImpl {
     };
 
     bool Dummy = false;
-    if (!genericValueTraversal<bool>(A, getIRPosition(), *this, Dummy,
-                                     VisitValueCB, getCtxI(),
-                                     /* UseValueSimplify */ false))
+    if (!genericValueTraversal<AAValueSimplify, bool>(
+            A, getIRPosition(), *this, Dummy, VisitValueCB, getCtxI(),
+            /* UseValueSimplify */ false))
       if (!askSimplifiedValueForOtherAAs(A))
         return indicatePessimisticFixpoint();
 
@@ -4802,7 +4906,7 @@ struct AAValueSimplifyFunction : AAValueSimplifyImpl {
 
   /// See AbstractAttribute::initialize(...).
   void initialize(Attributor &A) override {
-    SimplifiedAssociatedValue = nullptr;
+    SimplifiedAssociatedValue = &getAnchorValue();
     indicateOptimisticFixpoint();
   }
   /// See AbstractAttribute::initialize(...).
@@ -4825,43 +4929,19 @@ struct AAValueSimplifyCallSite : AAValueSimplifyFunction {
   }
 };
 
-struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
+struct AAValueSimplifyCallSiteReturned : AAValueSimplifyReturned {
   AAValueSimplifyCallSiteReturned(const IRPosition &IRP, Attributor &A)
-      : AAValueSimplifyImpl(IRP, A) {}
-
-  void initialize(Attributor &A) override {
-    if (!getAssociatedFunction())
-      indicatePessimisticFixpoint();
-  }
+      : AAValueSimplifyReturned(IRP, A) {}
 
-  /// See AbstractAttribute::updateImpl(...).
-  ChangeStatus updateImpl(Attributor &A) override {
-    auto Before = SimplifiedAssociatedValue;
-    auto &RetAA = A.getAAFor<AAReturnedValues>(
-        *this, IRPosition::function(*getAssociatedFunction()),
-        DepClassTy::REQUIRED);
-    auto PredForReturned =
-        [&](Value &RetVal, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
-          bool UsedAssumedInformation = false;
-          Optional<Value *> CSRetVal = A.translateArgumentToCallSiteContent(
-              &RetVal, *cast<CallBase>(getCtxI()), *this,
-              UsedAssumedInformation);
-          SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
-              SimplifiedAssociatedValue, CSRetVal, getAssociatedType());
-          return SimplifiedAssociatedValue != Optional<Value *>(nullptr);
-        };
-    if (!RetAA.checkForAllReturnedValuesAndReturnInsts(PredForReturned))
-      if (!askSimplifiedValueForOtherAAs(A))
-        return indicatePessimisticFixpoint();
-    return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED
-                                               : ChangeStatus ::CHANGED;
+  /// See AbstractAttribute::manifest(...).
+  ChangeStatus manifest(Attributor &A) override {
+    return AAValueSimplifyImpl::manifest(A);
   }
 
   void trackStatistics() const override {
     STATS_DECLTRACK_CSRET_ATTR(value_simplify)
   }
 };
-
 struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
   AAValueSimplifyCallSiteArgument(const IRPosition &IRP, Attributor &A)
       : AAValueSimplifyFloating(IRP, A) {}
@@ -4870,11 +4950,23 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
   ChangeStatus manifest(Attributor &A) override {
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
 
-    if (auto *NewV = getReplacementValue(A)) {
+    if (SimplifiedAssociatedValue.hasValue() &&
+        !SimplifiedAssociatedValue.getValue())
+      return Changed;
+
+    Value &V = getAssociatedValue();
+    auto *C = SimplifiedAssociatedValue.hasValue()
+                  ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
+                  : UndefValue::get(V.getType());
+    if (C) {
       Use &U = cast<CallBase>(&getAnchorValue())
                    ->getArgOperandUse(getCallSiteArgNo());
-      if (A.changeUseAfterManifest(U, *NewV))
-        Changed = ChangeStatus::CHANGED;
+      // We can replace the AssociatedValue with the constant.
+      if (&V != C) {
+        if (Value *NewV = AA::getWithType(*C, *V.getType()))
+          if (A.changeUseAfterManifest(U, *NewV))
+            Changed = ChangeStatus::CHANGED;
+      }
     }
 
     return Changed | AAValueSimplify::manifest(A);
@@ -4886,119 +4978,23 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
 };
 
 /// ----------------------- Heap-To-Stack Conversion ---------------------------
-struct AAHeapToStackFunction final : public AAHeapToStack {
-
-  struct AllocationInfo {
-    /// The call that allocates the memory.
-    CallBase *CB;
-
-    /// The kind of allocation.
-    const enum class AllocationKind {
-      MALLOC,
-      CALLOC,
-      ALIGNED_ALLOC,
-    } Kind;
-
-    /// The library function id for the allocation.
-    LibFunc LibraryFunctionId;
-
-    /// The status wrt. a rewrite.
-    enum {
-      STACK_DUE_TO_USE,
-      STACK_DUE_TO_FREE,
-      INVALID,
-    } Status = STACK_DUE_TO_USE;
-
-    /// Flag to indicate if we encountered a use that might free this allocation
-    /// but which is not in the deallocation infos.
-    bool HasPotentiallyFreeingUnknownUses = false;
-
-    /// The set of free calls that use this allocation.
-    SmallPtrSet<CallBase *, 1> PotentialFreeCalls;
-  };
-
-  struct DeallocationInfo {
-    /// The call that deallocates the memory.
-    CallBase *CB;
-
-    /// Flag to indicate if we don't know all objects this deallocation might
-    /// free..
-    bool MightFreeUnknownObjects = false;
-
-    /// The set of allocation calls that are potentially freed.
-    SmallPtrSet<CallBase *, 1> PotentialAllocationCalls;
-  };
-
-  AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
+struct AAHeapToStackImpl : public AAHeapToStack {
+  AAHeapToStackImpl(const IRPosition &IRP, Attributor &A)
       : AAHeapToStack(IRP, A) {}
 
-  void initialize(Attributor &A) override {
-    AAHeapToStack::initialize(A);
-
-    const Function *F = getAnchorScope();
-    const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
-
-    auto AllocationIdentifierCB = [&](Instruction &I) {
-      CallBase *CB = dyn_cast<CallBase>(&I);
-      if (!CB)
-        return true;
-      if (isFreeCall(CB, TLI)) {
-        DeallocationInfos[CB] = new (A.Allocator) DeallocationInfo{CB};
-        return true;
-      }
-      bool IsMalloc = isMallocLikeFn(CB, TLI);
-      bool IsAlignedAllocLike = !IsMalloc && isAlignedAllocLikeFn(CB, TLI);
-      bool IsCalloc =
-          !IsMalloc && !IsAlignedAllocLike && isCallocLikeFn(CB, TLI);
-      if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc)
-        return true;
-      auto Kind =
-          IsMalloc ? AllocationInfo::AllocationKind::MALLOC
-                   : (IsCalloc ? AllocationInfo::AllocationKind::CALLOC
-                               : AllocationInfo::AllocationKind::ALIGNED_ALLOC);
-
-      AllocationInfo *AI = new (A.Allocator) AllocationInfo{CB, Kind};
-      AllocationInfos[CB] = AI;
-      TLI->getLibFunc(*CB, AI->LibraryFunctionId);
-      return true;
-    };
-
-    bool UsedAssumedInformation = false;
-    bool Success = A.checkForAllCallLikeInstructions(
-        AllocationIdentifierCB, *this, UsedAssumedInformation,
-        /* CheckBBLivenessOnly */ false,
-        /* CheckPotentiallyDead */ true);
-    (void)Success;
-    assert(Success && "Did not expect the call base visit callback to fail!");
-  }
-
   const std::string getAsStr() const override {
-    unsigned NumH2SMallocs = 0, NumInvalidMallocs = 0;
-    for (const auto &It : AllocationInfos) {
-      if (It.second->Status == AllocationInfo::INVALID)
-        ++NumInvalidMallocs;
-      else
-        ++NumH2SMallocs;
-    }
-    return "[H2S] Mallocs Good/Bad: " + std::to_string(NumH2SMallocs) + "/" +
-           std::to_string(NumInvalidMallocs);
+    return "[H2S] Mallocs Good/Bad: " + std::to_string(MallocCalls.size()) +
+           "/" + std::to_string(BadMallocCalls.size());
   }
 
-  /// See AbstractAttribute::trackStatistics().
-  void trackStatistics() const override {
-    STATS_DECL(
-        MallocCalls, Function,
-        "Number of malloc/calloc/aligned_alloc calls converted to allocas");
-    for (auto &It : AllocationInfos)
-      if (It.second->Status != AllocationInfo::INVALID)
-        ++BUILD_STAT_NAME(MallocCalls, Function);
+  bool isAssumedHeapToStack(CallBase &CB) const override {
+    return isValidState() && MallocCalls.contains(&CB) &&
+           !BadMallocCalls.count(&CB);
   }
 
-  bool isAssumedHeapToStack(CallBase &CB) const override {
-    if (isValidState())
-      if (AllocationInfo *AI = AllocationInfos.lookup(&CB))
-        return AI->Status != AllocationInfo::INVALID;
-    return false;
+  bool isKnownHeapToStack(CallBase &CB) const override {
+    return isValidState() && MallocCalls.contains(&CB) &&
+           !BadMallocCalls.count(&CB);
   }
 
   ChangeStatus manifest(Attributor &A) override {
@@ -5009,82 +5005,76 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
     Function *F = getAnchorScope();
     const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
 
-    for (auto &It : AllocationInfos) {
-      AllocationInfo &AI = *It.second;
-      if (AI.Status == AllocationInfo::INVALID)
+    for (Instruction *MallocCall : MallocCalls) {
+      // This malloc cannot be replaced.
+      if (BadMallocCalls.count(MallocCall))
         continue;
 
-      for (CallBase *FreeCall : AI.PotentialFreeCalls) {
+      for (Instruction *FreeCall : FreesForMalloc[MallocCall]) {
         LLVM_DEBUG(dbgs() << "H2S: Removing free call: " << *FreeCall << "\n");
         A.deleteAfterManifest(*FreeCall);
         HasChanged = ChangeStatus::CHANGED;
       }
 
-      LLVM_DEBUG(dbgs() << "H2S: Removing malloc-like call: " << *AI.CB
+      LLVM_DEBUG(dbgs() << "H2S: Removing malloc call: " << *MallocCall
                         << "\n");
 
       auto Remark = [&](OptimizationRemark OR) {
         LibFunc IsAllocShared;
-        if (TLI->getLibFunc(*AI.CB, IsAllocShared))
+        if (auto *CB = dyn_cast<CallBase>(MallocCall)) {
+          TLI->getLibFunc(*CB, IsAllocShared);
           if (IsAllocShared == LibFunc___kmpc_alloc_shared)
             return OR << "Moving globalized variable to the stack.";
+        }
         return OR << "Moving memory allocation from the heap to the stack.";
       };
-      A.emitRemark<OptimizationRemark>(AI.CB, "HeapToStack", Remark);
+      A.emitRemark<OptimizationRemark>(MallocCall, "HeapToStack", Remark);
 
+      Align Alignment;
       Value *Size;
-      Optional<APInt> SizeAPI = getSize(A, *this, AI);
-      if (SizeAPI.hasValue()) {
-        Size = ConstantInt::get(AI.CB->getContext(), *SizeAPI);
-      } else if (AI.Kind == AllocationInfo::AllocationKind::CALLOC) {
-        auto *Num = AI.CB->getOperand(0);
-        auto *SizeT = AI.CB->getOperand(1);
-        IRBuilder<> B(AI.CB);
+      if (isCallocLikeFn(MallocCall, TLI)) {
+        auto *Num = MallocCall->getOperand(0);
+        auto *SizeT = MallocCall->getOperand(1);
+        IRBuilder<> B(MallocCall);
         Size = B.CreateMul(Num, SizeT, "h2s.calloc.size");
-      } else if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC) {
-        Size = AI.CB->getOperand(1);
+      } else if (isAlignedAllocLikeFn(MallocCall, TLI)) {
+        Size = MallocCall->getOperand(1);
+        Alignment = MaybeAlign(cast<ConstantInt>(MallocCall->getOperand(0))
+                                   ->getValue()
+                                   .getZExtValue())
+                        .valueOrOne();
       } else {
-        Size = AI.CB->getOperand(0);
-      }
-
-      Align Alignment(1);
-      if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC) {
-        Optional<APInt> AlignmentAPI =
-            getAPInt(A, *this, *AI.CB->getArgOperand(0));
-        assert(AlignmentAPI.hasValue() &&
-               "Expected an alignment during manifest!");
-        Alignment =
-            max(Alignment, MaybeAlign(AlignmentAPI.getValue().getZExtValue()));
+        Size = MallocCall->getOperand(0);
       }
 
-      unsigned AS = cast<PointerType>(AI.CB->getType())->getAddressSpace();
-      Instruction *Alloca =
+      unsigned AS = cast<PointerType>(MallocCall->getType())->getAddressSpace();
+      Instruction *AI =
           new AllocaInst(Type::getInt8Ty(F->getContext()), AS, Size, Alignment,
-                         "", AI.CB->getNextNode());
+                         "", MallocCall->getNextNode());
 
-      if (Alloca->getType() != AI.CB->getType())
-        Alloca = new BitCastInst(Alloca, AI.CB->getType(), "malloc_bc",
-                                 Alloca->getNextNode());
+      if (AI->getType() != MallocCall->getType())
+        AI = new BitCastInst(AI, MallocCall->getType(), "malloc_bc",
+                             AI->getNextNode());
 
-      A.changeValueAfterManifest(*AI.CB, *Alloca);
+      A.changeValueAfterManifest(*MallocCall, *AI);
 
-      if (auto *II = dyn_cast<InvokeInst>(AI.CB)) {
+      if (auto *II = dyn_cast<InvokeInst>(MallocCall)) {
         auto *NBB = II->getNormalDest();
-        BranchInst::Create(NBB, AI.CB->getParent());
-        A.deleteAfterManifest(*AI.CB);
+        BranchInst::Create(NBB, MallocCall->getParent());
+        A.deleteAfterManifest(*MallocCall);
       } else {
-        A.deleteAfterManifest(*AI.CB);
+        A.deleteAfterManifest(*MallocCall);
       }
 
       // Zero out the allocated memory if it was a calloc.
-      if (AI.Kind == AllocationInfo::AllocationKind::CALLOC) {
-        auto *BI = new BitCastInst(Alloca, AI.CB->getType(), "calloc_bc",
-                                   Alloca->getNextNode());
+      if (isCallocLikeFn(MallocCall, TLI)) {
+        auto *BI = new BitCastInst(AI, MallocCall->getType(), "calloc_bc",
+                                   AI->getNextNode());
         Value *Ops[] = {
             BI, ConstantInt::get(F->getContext(), APInt(8, 0, false)), Size,
             ConstantInt::get(Type::getInt1Ty(F->getContext()), false)};
 
-        Type *Tys[] = {BI->getType(), AI.CB->getOperand(0)->getType()};
+        Type *Tys[] = {BI->getType(), MallocCall->getOperand(0)->getType()};
         Module *M = F->getParent();
         Function *Fn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
         CallInst::Create(Fn, Ops, "", BI->getNextNode());
@@ -5095,58 +5085,21 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
     return HasChanged;
   }
 
-  Optional<APInt> getAPInt(Attributor &A, const AbstractAttribute &AA,
-                           Value &V) {
-    bool UsedAssumedInformation = false;
-    Optional<Constant *> SimpleV =
-        A.getAssumedConstant(V, AA, UsedAssumedInformation);
-    if (!SimpleV.hasValue())
-      return APInt(64, 0);
-    if (auto *CI = dyn_cast_or_null<ConstantInt>(SimpleV.getValue()))
-      return CI->getValue();
-    return llvm::None;
-  }
-
-  Optional<APInt> getSize(Attributor &A, const AbstractAttribute &AA,
-                          AllocationInfo &AI) {
-
-    if (AI.Kind == AllocationInfo::AllocationKind::MALLOC)
-      return getAPInt(A, AA, *AI.CB->getArgOperand(0));
-
-    if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC)
-      // Only if the alignment is also constant we return a size.
-      return getAPInt(A, AA, *AI.CB->getArgOperand(0)).hasValue()
-                 ? getAPInt(A, AA, *AI.CB->getArgOperand(1))
-                 : llvm::None;
-
-    assert(AI.Kind == AllocationInfo::AllocationKind::CALLOC &&
-           "Expected only callocs are left");
-    Optional<APInt> Num = getAPInt(A, AA, *AI.CB->getArgOperand(0));
-    Optional<APInt> Size = getAPInt(A, AA, *AI.CB->getArgOperand(1));
-    if (!Num.hasValue() || !Size.hasValue())
-      return llvm::None;
-    bool Overflow = false;
-    Size = Size.getValue().umul_ov(Num.getValue(), Overflow);
-    return Overflow ? llvm::None : Size;
-  }
+  /// Collection of all malloc calls in a function.
+  SmallSetVector<Instruction *, 4> MallocCalls;
 
-  /// Collection of all malloc-like calls in a function with associated
-  /// information.
-  DenseMap<CallBase *, AllocationInfo *> AllocationInfos;
+  /// Collection of malloc calls that cannot be converted.
+  DenseSet<const Instruction *> BadMallocCalls;
 
-  /// Collection of all free-like calls in a function with associated
-  /// information.
-  DenseMap<CallBase *, DeallocationInfo *> DeallocationInfos;
+  /// A map for each malloc call to the set of associated free calls.
+  DenseMap<Instruction *, SmallPtrSet<Instruction *, 4>> FreesForMalloc;
 
   ChangeStatus updateImpl(Attributor &A) override;
 };
 
-ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
-  ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ChangeStatus AAHeapToStackImpl::updateImpl(Attributor &A) {
   const Function *F = getAnchorScope();
-
-  const auto &LivenessAA =
-      A.getAAFor<AAIsDead>(*this, IRPosition::function(*F), DepClassTy::NONE);
+  const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
 
   MustBeExecutedContextExplorer &Explorer =
       A.getInfoCache().getMustBeExecutedContextExplorer();
@@ -5154,67 +5107,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
   bool StackIsAccessibleByOtherThreads =
       A.getInfoCache().stackIsAccessibleByOtherThreads();
 
-  // Flag to ensure we update our deallocation information at most once per
-  // updateImpl call and only if we use the free check reasoning.
-  bool HasUpdatedFrees = false;
-
-  auto UpdateFrees = [&]() {
-    HasUpdatedFrees = true;
-
-    for (auto &It : DeallocationInfos) {
-      DeallocationInfo &DI = *It.second;
-      // For now we cannot use deallocations that have unknown inputs, skip
-      // them.
-      if (DI.MightFreeUnknownObjects)
-        continue;
-
-      // No need to analyze dead calls, ignore them instead.
-      bool UsedAssumedInformation = false;
-      if (A.isAssumedDead(*DI.CB, this, &LivenessAA, UsedAssumedInformation,
-                          /* CheckBBLivenessOnly */ true))
-        continue;
-
-      // Use the optimistic version to get the freed objects, ignoring dead
-      // branches etc.
-      SmallVector<Value *, 8> Objects;
-      if (!getAssumedUnderlyingObjects(A, *DI.CB->getArgOperand(0), Objects,
-                                       *this, DI.CB)) {
-        LLVM_DEBUG(
-            dbgs()
-            << "[H2S] Unexpected failure in getAssumedUnderlyingObjects!\n");
-        DI.MightFreeUnknownObjects = true;
-        continue;
-      }
-
-      // Check each object explicitly.
-      for (auto *Obj : Objects) {
-        // Free of null and undef can be ignored as no-ops (or UB in the latter
-        // case).
-        if (isa<ConstantPointerNull>(Obj) || isa<UndefValue>(Obj))
-          continue;
-
-        CallBase *ObjCB = dyn_cast<CallBase>(Obj);
-        if (!ObjCB) {
-          LLVM_DEBUG(dbgs()
-                     << "[H2S] Free of a non-call object: " << *Obj << "\n");
-          DI.MightFreeUnknownObjects = true;
-          continue;
-        }
-
-        AllocationInfo *AI = AllocationInfos.lookup(ObjCB);
-        if (!AI) {
-          LLVM_DEBUG(dbgs() << "[H2S] Free of a non-allocation object: " << *Obj
-                            << "\n");
-          DI.MightFreeUnknownObjects = true;
-          continue;
-        }
-
-        DI.PotentialAllocationCalls.insert(ObjCB);
-      }
-    }
-  };
-
-  auto FreeCheck = [&](AllocationInfo &AI) {
+  auto FreeCheck = [&](Instruction &I) {
     // If the stack is not accessible by other threads, the "must-free" logic
     // doesn't apply as the pointer could be shared and needs to be places in
     // "shareable" memory.
@@ -5228,55 +5121,19 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
         return false;
       }
     }
-    if (!HasUpdatedFrees)
-      UpdateFrees();
-
-    // TODO: Allow multi exit functions that have 
diff erent free calls.
-    if (AI.PotentialFreeCalls.size() != 1) {
+    const auto &Frees = FreesForMalloc.lookup(&I);
+    if (Frees.size() != 1) {
       LLVM_DEBUG(dbgs() << "[H2S] did not find one free call but "
-                        << AI.PotentialFreeCalls.size() << "\n");
-      return false;
-    }
-    CallBase *UniqueFree = *AI.PotentialFreeCalls.begin();
-    DeallocationInfo *DI = DeallocationInfos.lookup(UniqueFree);
-    if (!DI) {
-      LLVM_DEBUG(
-          dbgs() << "[H2S] unique free call was not known as deallocation call "
-                 << *UniqueFree << "\n");
-      return false;
-    }
-    if (DI->MightFreeUnknownObjects) {
-      LLVM_DEBUG(
-          dbgs() << "[H2S] unique free call might free unkown allocations\n");
+                        << Frees.size() << "\n");
       return false;
     }
-    if (DI->PotentialAllocationCalls.size() > 1) {
-      LLVM_DEBUG(dbgs() << "[H2S] unique free call might free "
-                        << DI->PotentialAllocationCalls.size()
-                        << " 
diff erent allocations\n");
-      return false;
-    }
-    if (*DI->PotentialAllocationCalls.begin() != AI.CB) {
-      LLVM_DEBUG(
-          dbgs()
-          << "[H2S] unique free call not known to free this allocation but "
-          << **DI->PotentialAllocationCalls.begin() << "\n");
-      return false;
-    }
-    Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
-    if (!Explorer.findInContextOf(UniqueFree, CtxI)) {
-      LLVM_DEBUG(
-          dbgs()
-          << "[H2S] unique free call might not be executed with the allocation "
-          << *UniqueFree << "\n");
-      return false;
-    }
-    return true;
+    Instruction *UniqueFree = *Frees.begin();
+    return Explorer.findInContextOf(UniqueFree, I.getNextNode());
   };
 
-  auto UsesCheck = [&](AllocationInfo &AI) {
+  auto UsesCheck = [&](Instruction &I) {
     bool ValidUsesOnly = true;
-
+    bool MustUse = true;
     auto Pred = [&](const Use &U, bool &Follow) -> bool {
       Instruction *UserI = cast<Instruction>(U.getUser());
       if (isa<LoadInst>(UserI))
@@ -5294,8 +5151,15 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
       if (auto *CB = dyn_cast<CallBase>(UserI)) {
         if (!CB->isArgOperand(&U) || CB->isLifetimeStartOrEnd())
           return true;
-        if (DeallocationInfos.count(CB)) {
-          AI.PotentialFreeCalls.insert(CB);
+        // Record malloc.
+        if (isFreeCall(UserI, TLI)) {
+          if (MustUse) {
+            FreesForMalloc[&I].insert(UserI);
+          } else {
+            LLVM_DEBUG(dbgs() << "[H2S] free potentially on 
diff erent mallocs: "
+                              << *UserI << "\n");
+            ValidUsesOnly = false;
+          }
           return true;
         }
 
@@ -5310,12 +5174,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
             *this, IRPosition::callsite_argument(*CB, ArgNo),
             DepClassTy::OPTIONAL);
 
-        bool MaybeCaptured = !NoCaptureAA.isAssumedNoCapture();
-        bool MaybeFreed = !ArgNoFreeAA.isAssumedNoFree();
-        if (MaybeCaptured ||
-            (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared &&
-             MaybeFreed)) {
-          AI.HasPotentiallyFreeingUnknownUses |= MaybeFreed;
+        if (!NoCaptureAA.isAssumedNoCapture() ||
+            !ArgNoFreeAA.isAssumedNoFree()) {
 
           // Emit a missed remark if this is missed OpenMP globalization.
           auto Remark = [&](OptimizationRemarkMissed ORM) {
@@ -5326,9 +5186,13 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
                        << "Mark as noescape to override.";
           };
 
-          if (AI.LibraryFunctionId == LibFunc___kmpc_alloc_shared)
-            A.emitRemark<OptimizationRemarkMissed>(AI.CB, "HeapToStackFailed",
-                                                   Remark);
+          LibFunc IsAllocShared;
+          if (auto *AllocShared = dyn_cast<CallBase>(&I)) {
+            TLI->getLibFunc(*AllocShared, IsAllocShared);
+            if (IsAllocShared == LibFunc___kmpc_alloc_shared)
+              A.emitRemark<OptimizationRemarkMissed>(
+                  AllocShared, "HeapToStackFailed", Remark);
+          }
 
           LLVM_DEBUG(dbgs() << "[H2S] Bad user: " << *UserI << "\n");
           ValidUsesOnly = false;
@@ -5338,6 +5202,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
 
       if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
           isa<PHINode>(UserI) || isa<SelectInst>(UserI)) {
+        MustUse &= !(isa<PHINode>(UserI) || isa<SelectInst>(UserI));
         Follow = true;
         return true;
       }
@@ -5347,64 +5212,96 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
       ValidUsesOnly = false;
       return true;
     };
-    A.checkForAllUses(Pred, *this, *AI.CB);
+    A.checkForAllUses(Pred, *this, I);
     return ValidUsesOnly;
   };
 
-  // The actual update starts here. We look at all allocations and depending on
-  // their status perform the appropriate check(s).
-  for (auto &It : AllocationInfos) {
-    AllocationInfo &AI = *It.second;
-    if (AI.Status == AllocationInfo::INVALID)
-      continue;
+  auto MallocCallocCheck = [&](Instruction &I) {
+    if (BadMallocCalls.count(&I))
+      return true;
 
-    if (MaxHeapToStackSize == -1) {
-      if (AI.Kind == AllocationInfo::AllocationKind::ALIGNED_ALLOC)
-        if (!getAPInt(A, *this, *AI.CB->getArgOperand(0)).hasValue()) {
-          LLVM_DEBUG(dbgs() << "[H2S] Unknown allocation alignment: " << *AI.CB
-                            << "\n");
-          AI.Status = AllocationInfo::INVALID;
-          Changed = ChangeStatus::CHANGED;
-          continue;
-        }
-    } else {
-      Optional<APInt> Size = getSize(A, *this, AI);
-      if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) {
-        LLVM_DEBUG({
-          if (!Size.hasValue())
-            dbgs() << "[H2S] Unknown allocation size (or alignment): " << *AI.CB
-                   << "\n";
-          else
-            dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. "
-                   << MaxHeapToStackSize << "\n";
-        });
+    bool IsMalloc = isMallocLikeFn(&I, TLI);
+    bool IsAlignedAllocLike = isAlignedAllocLikeFn(&I, TLI);
+    bool IsCalloc = !IsMalloc && isCallocLikeFn(&I, TLI);
+    if (!IsMalloc && !IsAlignedAllocLike && !IsCalloc) {
+      BadMallocCalls.insert(&I);
+      return true;
+    }
 
-        AI.Status = AllocationInfo::INVALID;
-        Changed = ChangeStatus::CHANGED;
-        continue;
+    if (IsMalloc) {
+      if (MaxHeapToStackSize == -1) {
+        if (UsesCheck(I) || FreeCheck(I)) {
+          MallocCalls.insert(&I);
+          return true;
+        }
+      }
+      if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(0)))
+        if (Size->getValue().ule(MaxHeapToStackSize))
+          if (UsesCheck(I) || FreeCheck(I)) {
+            MallocCalls.insert(&I);
+            return true;
+          }
+    } else if (IsAlignedAllocLike && isa<ConstantInt>(I.getOperand(0))) {
+      if (MaxHeapToStackSize == -1) {
+        if (UsesCheck(I) || FreeCheck(I)) {
+          MallocCalls.insert(&I);
+          return true;
+        }
+      }
+      // Only if the alignment and sizes are constant.
+      if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
+        if (Size->getValue().ule(MaxHeapToStackSize))
+          if (UsesCheck(I) || FreeCheck(I)) {
+            MallocCalls.insert(&I);
+            return true;
+          }
+    } else if (IsCalloc) {
+      if (MaxHeapToStackSize == -1) {
+        if (UsesCheck(I) || FreeCheck(I)) {
+          MallocCalls.insert(&I);
+          return true;
+        }
       }
+      bool Overflow = false;
+      if (auto *Num = dyn_cast<ConstantInt>(I.getOperand(0)))
+        if (auto *Size = dyn_cast<ConstantInt>(I.getOperand(1)))
+          if ((Size->getValue().umul_ov(Num->getValue(), Overflow))
+                  .ule(MaxHeapToStackSize))
+            if (!Overflow && (UsesCheck(I) || FreeCheck(I))) {
+              MallocCalls.insert(&I);
+              return true;
+            }
     }
 
-    switch (AI.Status) {
-    case AllocationInfo::STACK_DUE_TO_USE:
-      if (UsesCheck(AI))
-        continue;
-      AI.Status = AllocationInfo::STACK_DUE_TO_FREE;
-      LLVM_FALLTHROUGH;
-    case AllocationInfo::STACK_DUE_TO_FREE:
-      if (FreeCheck(AI))
-        continue;
-      AI.Status = AllocationInfo::INVALID;
-      Changed = ChangeStatus::CHANGED;
-      continue;
-    case AllocationInfo::INVALID:
-      llvm_unreachable("Invalid allocations should never reach this point!");
-    };
-  }
+    BadMallocCalls.insert(&I);
+    return true;
+  };
 
-  return Changed;
+  size_t NumBadMallocs = BadMallocCalls.size();
+
+  A.checkForAllCallLikeInstructions(MallocCallocCheck, *this);
+
+  if (NumBadMallocs != BadMallocCalls.size())
+    return ChangeStatus::CHANGED;
+
+  return ChangeStatus::UNCHANGED;
 }
 
+struct AAHeapToStackFunction final : public AAHeapToStackImpl {
+  AAHeapToStackFunction(const IRPosition &IRP, Attributor &A)
+      : AAHeapToStackImpl(IRP, A) {}
+
+  /// See AbstractAttribute::trackStatistics().
+  void trackStatistics() const override {
+    STATS_DECL(
+        MallocCalls, Function,
+        "Number of malloc/calloc/aligned_alloc calls converted to allocas");
+    for (auto *C : MallocCalls)
+      if (!BadMallocCalls.count(C))
+        ++BUILD_STAT_NAME(MallocCalls, Function);
+  }
+};
+
 /// ----------------------- Privatizable Pointers ------------------------------
 struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
   AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A)
@@ -5796,7 +5693,6 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
     // escape into tail recursion.
     // TODO: Be smarter about new allocas escaping into tail calls.
     SmallVector<CallInst *, 16> TailCalls;
-    bool UsedAssumedInformation = false;
     if (!A.checkForAllInstructions(
             [&](Instruction &I) {
               CallInst &CI = cast<CallInst>(I);
@@ -5804,7 +5700,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
                 TailCalls.push_back(&CI);
               return true;
             },
-            *this, {Instruction::Call}, UsedAssumedInformation))
+            *this, {Instruction::Call}))
       return ChangeStatus::UNCHANGED;
 
     Argument *Arg = getAssociatedArgument();
@@ -6352,9 +6248,7 @@ ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
     return !isAtFixpoint();
   };
 
-  bool UsedAssumedInformation = false;
-  if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this,
-                                          UsedAssumedInformation))
+  if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
     return indicatePessimisticFixpoint();
 
   return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
@@ -6406,13 +6300,10 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
   for (unsigned i = 0; i < Uses.size() && !isAtFixpoint(); i++) {
     const Use *U = Uses[i];
     Instruction *UserI = cast<Instruction>(U->getUser());
-    bool UsedAssumedInformation = false;
     LLVM_DEBUG(dbgs() << "[AAMemoryBehavior] Use: " << **U << " in " << *UserI
-                      << " [Dead: "
-                      << (A.isAssumedDead(*U, this, &LivenessAA,
-                                          UsedAssumedInformation))
+                      << " [Dead: " << (A.isAssumedDead(*U, this, &LivenessAA))
                       << "]\n");
-    if (A.isAssumedDead(*U, this, &LivenessAA, UsedAssumedInformation))
+    if (A.isAssumedDead(*U, this, &LivenessAA))
       continue;
 
     // Droppable users, e.g., llvm::assume does not actually perform any action.
@@ -6824,45 +6715,47 @@ void AAMemoryLocationImpl::categorizePtrValue(
                     << Ptr << " ["
                     << getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
 
-  SmallVector<Value *, 8> Objects;
-  if (!getAssumedUnderlyingObjects(A, Ptr, Objects, *this, &I)) {
-    LLVM_DEBUG(
-        dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
-    updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
-                              getAccessKindFromInst(&I));
-    return;
-  }
+  auto StripGEPCB = [](Value *V) -> Value * {
+    auto *GEP = dyn_cast<GEPOperator>(V);
+    while (GEP) {
+      V = GEP->getPointerOperand();
+      GEP = dyn_cast<GEPOperator>(V);
+    }
+    return V;
+  };
 
-  for (Value *Obj : Objects) {
+  auto VisitValueCB = [&](Value &V, const Instruction *,
+                          AAMemoryLocation::StateType &T,
+                          bool Stripped) -> bool {
     // TODO: recognize the TBAA used for constant accesses.
     MemoryLocationsKind MLK = NO_LOCATIONS;
-    assert(!isa<GEPOperator>(Obj) && "GEPs should have been stripped.");
-    if (isa<UndefValue>(Obj))
-      continue;
-    if (auto *Arg = dyn_cast<Argument>(Obj)) {
+    assert(!isa<GEPOperator>(V) && "GEPs should have been stripped.");
+    if (isa<UndefValue>(V))
+      return true;
+    if (auto *Arg = dyn_cast<Argument>(&V)) {
       if (Arg->hasByValAttr())
         MLK = NO_LOCAL_MEM;
       else
         MLK = NO_ARGUMENT_MEM;
-    } else if (auto *GV = dyn_cast<GlobalValue>(Obj)) {
+    } else if (auto *GV = dyn_cast<GlobalValue>(&V)) {
       // Reading constant memory is not treated as a read "effect" by the
       // function attr pass so we won't neither. Constants defined by TBAA are
       // similar. (We know we do not write it because it is constant.)
       if (auto *GVar = dyn_cast<GlobalVariable>(GV))
         if (GVar->isConstant())
-          continue;
+          return true;
 
       if (GV->hasLocalLinkage())
         MLK = NO_GLOBAL_INTERNAL_MEM;
       else
         MLK = NO_GLOBAL_EXTERNAL_MEM;
-    } else if (isa<ConstantPointerNull>(Obj) &&
+    } else if (isa<ConstantPointerNull>(V) &&
                !NullPointerIsDefined(getAssociatedFunction(),
-                                     Ptr.getType()->getPointerAddressSpace())) {
-      continue;
-    } else if (isa<AllocaInst>(Obj)) {
+                                     V.getType()->getPointerAddressSpace())) {
+      return true;
+    } else if (isa<AllocaInst>(V)) {
       MLK = NO_LOCAL_MEM;
-    } else if (const auto *CB = dyn_cast<CallBase>(Obj)) {
+    } else if (const auto *CB = dyn_cast<CallBase>(&V)) {
       const auto &NoAliasAA = A.getAAFor<AANoAlias>(
           *this, IRPosition::callsite_returned(*CB), DepClassTy::OPTIONAL);
       if (NoAliasAA.isAssumedNoAlias())
@@ -6874,16 +6767,28 @@ void AAMemoryLocationImpl::categorizePtrValue(
     }
 
     assert(MLK != NO_LOCATIONS && "No location specified!");
-    LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Ptr value can be categorized: "
-                      << *Obj << " -> " << getMemoryLocationsAsStr(MLK)
+    updateStateAndAccessesMap(T, MLK, &I, &V, Changed,
+                              getAccessKindFromInst(&I));
+    LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Ptr value cannot be categorized: "
+                      << V << " -> " << getMemoryLocationsAsStr(T.getAssumed())
                       << "\n");
-    updateStateAndAccessesMap(getState(), MLK, &I, Obj, Changed,
+    return true;
+  };
+
+  if (!genericValueTraversal<AAMemoryLocation, AAMemoryLocation::StateType>(
+          A, IRPosition::value(Ptr), *this, State, VisitValueCB, getCtxI(),
+          /* UseValueSimplify */ true,
+          /* MaxValues */ 32, StripGEPCB)) {
+    LLVM_DEBUG(
+        dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
+    updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
                               getAccessKindFromInst(&I));
+  } else {
+    LLVM_DEBUG(
+        dbgs()
+        << "[AAMemoryLocation] Accessed locations with pointer locations: "
+        << getMemoryLocationsAsStr(State.getAssumed()) << "\n");
   }
-
-  LLVM_DEBUG(
-      dbgs() << "[AAMemoryLocation] Accessed locations with pointer locations: "
-             << getMemoryLocationsAsStr(State.getAssumed()) << "\n");
 }
 
 void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
@@ -7029,9 +6934,7 @@ struct AAMemoryLocationFunction final : public AAMemoryLocationImpl {
       return getAssumedNotAccessedLocation() != VALID_STATE;
     };
 
-    bool UsedAssumedInformation = false;
-    if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this,
-                                            UsedAssumedInformation))
+    if (!A.checkForAllReadWriteInstructions(CheckRWInst, *this))
       return indicatePessimisticFixpoint();
 
     Changed |= AssumedState != getAssumed();
@@ -7171,42 +7074,11 @@ struct AAValueConstantRangeImpl : AAValueConstantRange {
                                  const_cast<Instruction *>(CtxI));
   }
 
-  /// Return true if \p CtxI is valid for querying outside analyses.
-  /// This basically makes sure we do not ask intra-procedural analysis
-  /// about a context in the wrong function or a context that violates
-  /// dominance assumptions they might have. The \p AllowAACtxI flag indicates
-  /// if the original context of this AA is OK or should be considered invalid.
-  bool isValidCtxInstructionForOutsideAnalysis(Attributor &A,
-                                               const Instruction *CtxI,
-                                               bool AllowAACtxI) const {
-    if (!CtxI || (!AllowAACtxI && CtxI == getCtxI()))
-      return false;
-
-    // Our context might be in a 
diff erent function, neither intra-procedural
-    // analysis (ScalarEvolution nor LazyValueInfo) can handle that.
-    if (!AA::isValidInScope(getAssociatedValue(), CtxI->getFunction()))
-      return false;
-
-    // If the context is not dominated by the value there are paths to the
-    // context that do not define the value. This cannot be handled by
-    // LazyValueInfo so we need to bail.
-    if (auto *I = dyn_cast<Instruction>(&getAssociatedValue())) {
-      InformationCache &InfoCache = A.getInfoCache();
-      const DominatorTree *DT =
-          InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(
-              *I->getFunction());
-      return DT && DT->dominates(I, CtxI);
-    }
-
-    return true;
-  }
-
   /// See AAValueConstantRange::getKnownConstantRange(..).
   ConstantRange
   getKnownConstantRange(Attributor &A,
                         const Instruction *CtxI = nullptr) const override {
-    if (!isValidCtxInstructionForOutsideAnalysis(A, CtxI,
-                                                 /* AllowAACtxI */ false))
+    if (!CtxI || CtxI == getCtxI())
       return getKnown();
 
     ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
@@ -7222,8 +7094,9 @@ struct AAValueConstantRangeImpl : AAValueConstantRange {
     //       We may be able to bound a variable range via assumptions in
     //       Attributor. ex.) If x is assumed to be in [1, 3] and y is known to
     //       evolve to x^2 + x, then we can say that y is in [2, 12].
-    if (!isValidCtxInstructionForOutsideAnalysis(A, CtxI,
-                                                 /* AllowAACtxI */ false))
+
+    if (!CtxI || CtxI == getCtxI() ||
+        !AA::isValidInScope(getAssociatedValue(), CtxI->getFunction()))
       return getAssumed();
 
     ConstantRange LVIR = getConstantRangeFromLVI(A, CtxI);
@@ -7613,9 +7486,9 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
 
     IntegerRangeState T(getBitWidth());
 
-    if (!genericValueTraversal<IntegerRangeState>(A, getIRPosition(), *this, T,
-                                                  VisitValueCB, getCtxI(),
-                                                  /* UseValueSimplify */ false))
+    if (!genericValueTraversal<AAValueConstantRange, IntegerRangeState>(
+            A, getIRPosition(), *this, T, VisitValueCB, getCtxI(),
+            /* UseValueSimplify */ false))
       return indicatePessimisticFixpoint();
 
     return clampStateAndIndicateChange(getState(), T);
@@ -8338,13 +8211,12 @@ struct AANoUndefImpl : AANoUndef {
     // We don't manifest noundef attribute for dead positions because the
     // associated values with dead positions would be replaced with undef
     // values.
-    bool UsedAssumedInformation = false;
-    if (A.isAssumedDead(getIRPosition(), nullptr, nullptr,
-                        UsedAssumedInformation))
+    if (A.isAssumedDead(getIRPosition(), nullptr, nullptr))
       return ChangeStatus::UNCHANGED;
     // A position whose simplified value does not have any value is
     // considered to be dead. We don't manifest noundef in such positions for
     // the same reason above.
+    bool UsedAssumedInformation = false;
     if (!A.getAssumedSimplified(getIRPosition(), *this, UsedAssumedInformation)
              .hasValue())
       return ChangeStatus::UNCHANGED;
@@ -8381,8 +8253,8 @@ struct AANoUndefFloating : public AANoUndefImpl {
     };
 
     StateType T;
-    if (!genericValueTraversal<StateType>(A, getIRPosition(), *this, T,
-                                          VisitValueCB, getCtxI()))
+    if (!genericValueTraversal<AANoUndef, StateType>(
+            A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
       return indicatePessimisticFixpoint();
 
     return clampStateAndIndicateChange(getState(), T);
@@ -8459,9 +8331,9 @@ struct AACallEdgesFunction : public AACallEdges {
 
     // Process any value that we might call.
     auto ProcessCalledOperand = [&](Value *V, Instruction *Ctx) {
-      if (!genericValueTraversal<bool>(A, IRPosition::value(*V), *this,
-                                       HasUnknownCallee, VisitValue, nullptr,
-                                       false))
+      if (!genericValueTraversal<AACallEdges, bool>(A, IRPosition::value(*V),
+                                                    *this, HasUnknownCallee,
+                                                    VisitValue, nullptr, false))
         // If we haven't gone through all values, assume that there are unknown
         // callees.
         HasUnknownCallee = true;
@@ -8495,9 +8367,7 @@ struct AACallEdgesFunction : public AACallEdges {
     };
 
     // Visit all callable instructions.
-    bool UsedAssumedInformation = false;
-    if (!A.checkForAllCallLikeInstructions(ProcessCallInst, *this,
-                                           UsedAssumedInformation))
+    if (!A.checkForAllCallLikeInstructions(ProcessCallInst, *this))
       // If we haven't looked at all call like instructions, assume that there
       // are unknown callees.
       HasUnknownCallee = true;

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 71c5a3559eaa..5c4be0b8b9bd 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -25,11 +25,10 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
-#include "llvm/IR/Assumptions.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Instruction.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/IPO.h"
@@ -38,6 +37,7 @@
 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
 #include "llvm/Transforms/Utils/CodeExtractor.h"
 
+using namespace llvm::PatternMatch;
 using namespace llvm;
 using namespace omp;
 
@@ -74,18 +74,6 @@ STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
           "Number of OpenMP runtime function uses identified");
 STATISTIC(NumOpenMPTargetRegionKernels,
           "Number of OpenMP target region entry points (=kernels) identified");
-STATISTIC(NumOpenMPTargetRegionKernelsSPMD,
-          "Number of OpenMP target region entry points (=kernels) executed in "
-          "SPMD-mode instead of generic-mode");
-STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,
-          "Number of OpenMP target region entry points (=kernels) executed in "
-          "generic-mode without a state machines");
-STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,
-          "Number of OpenMP target region entry points (=kernels) executed in "
-          "generic-mode with customized state machines with fallback");
-STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,
-          "Number of OpenMP target region entry points (=kernels) executed in "
-          "generic-mode with customized state machines without fallback");
 STATISTIC(
     NumOpenMPParallelRegionsReplacedInGPUStateMachine,
     "Number of OpenMP parallel regions replaced with ID in GPU state machines");
@@ -244,11 +232,6 @@ struct OMPInformationCache : public InformationCache {
     /// Map from functions to all uses of this runtime function contained in
     /// them.
     DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
-
-  public:
-    /// Iterators for the uses of this runtime function.
-    decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
-    decltype(UsesMap)::iterator end() { return UsesMap.end(); }
   };
 
   /// An OpenMP-IR-Builder instance
@@ -259,9 +242,6 @@ struct OMPInformationCache : public InformationCache {
                   RuntimeFunction::OMPRTL___last>
       RFIs;
 
-  /// Map from function declarations/definitions to their runtime enum type.
-  DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;
-
   /// Map from ICV kind to the ICV description.
   EnumeratedArray<InternalControlVarInfo, InternalControlVar,
                   InternalControlVar::ICV___last>
@@ -404,7 +384,6 @@ struct OMPInformationCache : public InformationCache {
     SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
     Function *F = M.getFunction(_Name);                                        \
     if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
-      RuntimeFunctionIDMap[F] = _Enum;                                         \
       auto &RFI = RFIs[_Enum];                                                 \
       RFI.Kind = _Enum;                                                        \
       RFI.Name = _Name;                                                        \
@@ -433,149 +412,6 @@ struct OMPInformationCache : public InformationCache {
   SmallPtrSetImpl<Kernel> &Kernels;
 };
 
-template <typename Ty, bool InsertInvalidates = true>
-struct BooleanStateWithPtrSetVector : public BooleanState {
-
-  bool contains(Ty *Elem) const { return Set.contains(Elem); }
-  bool insert(Ty *Elem) {
-    if (InsertInvalidates)
-      BooleanState::indicatePessimisticFixpoint();
-    return Set.insert(Elem);
-  }
-
-  Ty *operator[](int Idx) const { return Set[Idx]; }
-  bool operator==(const BooleanStateWithPtrSetVector &RHS) const {
-    return BooleanState::operator==(RHS) && Set == RHS.Set;
-  }
-  bool operator!=(const BooleanStateWithPtrSetVector &RHS) const {
-    return !(*this == RHS);
-  }
-
-  bool empty() const { return Set.empty(); }
-  size_t size() const { return Set.size(); }
-
-  /// "Clamp" this state with \p RHS.
-  BooleanStateWithPtrSetVector &
-  operator^=(const BooleanStateWithPtrSetVector &RHS) {
-    BooleanState::operator^=(RHS);
-    Set.insert(RHS.Set.begin(), RHS.Set.end());
-    return *this;
-  }
-
-private:
-  /// A set to keep track of elements.
-  SetVector<Ty *> Set;
-
-public:
-  typename decltype(Set)::iterator begin() { return Set.begin(); }
-  typename decltype(Set)::iterator end() { return Set.end(); }
-  typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
-  typename decltype(Set)::const_iterator end() const { return Set.end(); }
-};
-
-struct KernelInfoState : AbstractState {
-  /// Flag to track if we reached a fixpoint.
-  bool IsAtFixpoint = false;
-
-  /// The parallel regions (identified by the outlined parallel functions) that
-  /// can be reached from the associated function.
-  BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false>
-      ReachedKnownParallelRegions;
-
-  /// State to track what parallel region we might reach.
-  BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
-
-  /// State to track if we are in SPMD-mode, assumed or know, and why we decided
-  /// we cannot be.
-  BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker;
-
-  /// The __kmpc_target_init call in this kernel, if any. If we find more than
-  /// one we abort as the kernel is malformed.
-  CallBase *KernelInitCB = nullptr;
-
-  /// The __kmpc_target_deinit call in this kernel, if any. If we find more than
-  /// one we abort as the kernel is malformed.
-  CallBase *KernelDeinitCB = nullptr;
-
-  /// Abstract State interface
-  ///{
-
-  KernelInfoState() {}
-  KernelInfoState(bool BestState) {
-    if (!BestState)
-      indicatePessimisticFixpoint();
-  }
-
-  /// See AbstractState::isValidState(...)
-  bool isValidState() const override { return true; }
-
-  /// See AbstractState::isAtFixpoint(...)
-  bool isAtFixpoint() const override { return IsAtFixpoint; }
-
-  /// See AbstractState::indicatePessimisticFixpoint(...)
-  ChangeStatus indicatePessimisticFixpoint() override {
-    IsAtFixpoint = true;
-    SPMDCompatibilityTracker.indicatePessimisticFixpoint();
-    ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
-    return ChangeStatus::CHANGED;
-  }
-
-  /// See AbstractState::indicateOptimisticFixpoint(...)
-  ChangeStatus indicateOptimisticFixpoint() override {
-    IsAtFixpoint = true;
-    return ChangeStatus::UNCHANGED;
-  }
-
-  /// Return the assumed state
-  KernelInfoState &getAssumed() { return *this; }
-  const KernelInfoState &getAssumed() const { return *this; }
-
-  bool operator==(const KernelInfoState &RHS) const {
-    if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
-      return false;
-    if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
-      return false;
-    if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
-      return false;
-    return true;
-  }
-
-  /// Return empty set as the best state of potential values.
-  static KernelInfoState getBestState() { return KernelInfoState(true); }
-
-  static KernelInfoState getBestState(KernelInfoState &KIS) {
-    return getBestState();
-  }
-
-  /// Return full set as the worst state of potential values.
-  static KernelInfoState getWorstState() { return KernelInfoState(false); }
-
-  /// "Clamp" this state with \p KIS.
-  KernelInfoState operator^=(const KernelInfoState &KIS) {
-    // Do not merge two 
diff erent _init and _deinit call sites.
-    if (KIS.KernelInitCB) {
-      if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
-        indicatePessimisticFixpoint();
-      KernelInitCB = KIS.KernelInitCB;
-    }
-    if (KIS.KernelDeinitCB) {
-      if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
-        indicatePessimisticFixpoint();
-      KernelDeinitCB = KIS.KernelDeinitCB;
-    }
-    SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
-    ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
-    ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
-    return *this;
-  }
-
-  KernelInfoState operator&=(const KernelInfoState &KIS) {
-    return (*this ^= KIS);
-  }
-
-  ///}
-};
-
 /// Used to map the values physically (in the IR) stored in an offload
 /// array, to a vector in memory.
 struct OffloadArray {
@@ -690,7 +526,7 @@ struct OpenMPOpt {
                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
 
     if (IsModulePass) {
-      Changed |= runAttributor(IsModulePass);
+      Changed |= runAttributor();
 
       // Recollect uses, in case Attributor deleted any.
       OMPInfoCache.recollectUses();
@@ -703,14 +539,14 @@ struct OpenMPOpt {
       if (PrintOpenMPKernels)
         printKernels();
 
-      Changed |= runAttributor(IsModulePass);
+      Changed |= rewriteDeviceCodeStateMachine();
+
+      Changed |= runAttributor();
 
       // Recollect uses, in case Attributor deleted any.
       OMPInfoCache.recollectUses();
 
       Changed |= deleteParallelRegions();
-      Changed |= rewriteDeviceCodeStateMachine();
-
       if (HideMemoryTransferLatency)
         Changed |= hideMemTransfersLatency();
       Changed |= deduplicateRuntimeCalls();
@@ -1741,11 +1577,11 @@ struct OpenMPOpt {
   Attributor &A;
 
   /// Helper function to run Attributor on SCC.
-  bool runAttributor(bool IsModulePass) {
+  bool runAttributor() {
     if (SCC.empty())
       return false;
 
-    registerAAs(IsModulePass);
+    registerAAs();
 
     ChangeStatus Changed = A.run();
 
@@ -1757,7 +1593,46 @@ struct OpenMPOpt {
 
   /// Populate the Attributor with abstract attribute opportunities in the
   /// function.
-  void registerAAs(bool IsModulePass);
+  void registerAAs() {
+    if (SCC.empty())
+      return;
+
+    // Create CallSite AA for all Getters.
+    for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
+      auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
+
+      auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
+
+      auto CreateAA = [&](Use &U, Function &Caller) {
+        CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
+        if (!CI)
+          return false;
+
+        auto &CB = cast<CallBase>(*CI);
+
+        IRPosition CBPos = IRPosition::callsite_function(CB);
+        A.getOrCreateAAFor<AAICVTracker>(CBPos);
+        return false;
+      };
+
+      GetterRFI.foreachUse(SCC, CreateAA);
+    }
+    auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
+    auto CreateAA = [&](Use &U, Function &F) {
+      A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
+      return false;
+    };
+    GlobalizationRFI.foreachUse(SCC, CreateAA);
+
+    // Create an ExecutionDomain AA for every function and a HeapToStack AA for
+    // every function if there is a device kernel.
+    for (auto *F : SCC) {
+      if (!F->isDeclaration())
+        A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
+      if (isOpenMPDevice(M))
+        A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
+    }
+  }
 };
 
 Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
@@ -1895,7 +1770,7 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
     // TODO: Checking the number of uses is not a necessary restriction and
     // should be lifted.
     if (UnknownUse || NumDirectCalls != 1 ||
-        ToBeReplacedStateMachineUses.size() > 2) {
+        ToBeReplacedStateMachineUses.size() != 2) {
       {
         auto Remark = [&](OptimizationRemarkAnalysis ORA) {
           return ORA << "Parallel region is used in "
@@ -2078,9 +1953,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
       // Track all changes of an ICV.
       SetterRFI.foreachUse(TrackValues, F);
 
-      bool UsedAssumedInformation = false;
       A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
-                                UsedAssumedInformation,
                                 /* CheckBBLivenessOnly */ true);
 
       /// TODO: Figure out a way to avoid adding entry in
@@ -2263,9 +2136,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker {
         return true;
       };
 
-      bool UsedAssumedInformation = false;
       if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
-                                     UsedAssumedInformation,
                                      /* CheckBBLivenessOnly */ true))
         UniqueICVValue = nullptr;
 
@@ -2470,12 +2341,10 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
                               AllCallSitesKnown))
     SingleThreadedBBs.erase(&F->getEntryBlock());
 
-  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
-  auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
-
-  // Check if the edge into the successor block compares the __kmpc_target_init
-  // result with -1. If we are in non-SPMD-mode that signals only the main
-  // thread will execute the edge.
+  // Check if the edge into the successor block compares a thread-id function to
+  // a constant zero.
+  // TODO: Use AAValueSimplify to simplify and propogate constants.
+  // TODO: Check more than a single use for thread ID's.
   auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
     if (!Edge || !Edge->isConditional())
       return false;
@@ -2486,20 +2355,31 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
     if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
       return false;
 
+    // Temporarily match the pattern generated by clang for teams regions.
+    // TODO: Remove this once the new runtime is in place.
+    ConstantInt *One, *NegOne;
+    CmpInst::Predicate Pred;
+    auto &&m_ThreadID = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_tid_x>();
+    auto &&m_WarpSize = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_warpsize>();
+    auto &&m_BlockSize = m_Intrinsic<Intrinsic::nvvm_read_ptx_sreg_ntid_x>();
+    if (match(Cmp, m_Cmp(Pred, m_ThreadID,
+                         m_And(m_Sub(m_BlockSize, m_ConstantInt(One)),
+                               m_Xor(m_Sub(m_WarpSize, m_ConstantInt(One)),
+                                     m_ConstantInt(NegOne))))))
+      if (One->isOne() && NegOne->isMinusOne() &&
+          Pred == CmpInst::Predicate::ICMP_EQ)
+        return true;
+
     ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
-    if (!C)
+    if (!C || !C->isZero())
       return false;
 
-    // Match:  -1 == __kmpc_target_init (for non-SPMD kernels only!)
-    if (C->isAllOnesValue()) {
-      auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
-      if (!CB || CB->getCalledFunction() != RFI.Declaration)
-        return false;
-      const int InitIsSPMDArgNo = 1;
-      auto *IsSPMDModeCI =
-          dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo));
-      return IsSPMDModeCI && IsSPMDModeCI->isZero();
-    }
+    if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+      if (II->getIntrinsicID() == Intrinsic::nvvm_read_ptx_sreg_tid_x)
+        return true;
+    if (auto *II = dyn_cast<IntrinsicInst>(Cmp->getOperand(0)))
+      if (II->getIntrinsicID() == Intrinsic::amdgcn_workitem_id_x)
+        return true;
 
     return false;
   };
@@ -2514,7 +2394,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
     for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
          PredBB != PredEndBB; ++PredBB) {
       if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
-                               BB))
+                              BB))
         IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
     }
 
@@ -2592,7 +2472,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
     for (CallBase *CB : MallocCalls) {
       // Skip replacing this if HeapToStack has already claimed it.
-      if (HS && HS->isAssumedHeapToStack(*CB))
+      if (HS && HS->isKnownHeapToStack(*CB))
         continue;
 
       // Find the unique free call to remove it.
@@ -2674,766 +2554,9 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
   SmallPtrSet<CallBase *, 4> MallocCalls;
 };
 
-struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
-  using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
-  AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
-
-  /// Statistics are tracked as part of manifest for now.
-  void trackStatistics() const override {}
-
-  /// See AbstractAttribute::getAsStr()
-  const std::string getAsStr() const override {
-    if (!isValidState())
-      return "<invalid>";
-    return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
-                                                            : "generic") +
-           std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
-                                                               : "") +
-           std::string(" #PRs: ") +
-           std::to_string(ReachedKnownParallelRegions.size()) +
-           ", #Unknown PRs: " +
-           std::to_string(ReachedUnknownParallelRegions.size());
-  }
-
-  /// Create an abstract attribute biew for the position \p IRP.
-  static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);
-
-  /// See AbstractAttribute::getName()
-  const std::string getName() const override { return "AAKernelInfo"; }
-
-  /// See AbstractAttribute::getIdAddr()
-  const char *getIdAddr() const override { return &ID; }
-
-  /// This function should return true if the type of the \p AA is AAKernelInfo
-  static bool classof(const AbstractAttribute *AA) {
-    return (AA->getIdAddr() == &ID);
-  }
-
-  static const char ID;
-};
-
-/// The function kernel info abstract attribute, basically, what can we say
-/// about a function with regards to the KernelInfoState.
-struct AAKernelInfoFunction : AAKernelInfo {
-  AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
-      : AAKernelInfo(IRP, A) {}
-
-  /// See AbstractAttribute::initialize(...).
-  void initialize(Attributor &A) override {
-    // This is a high-level transform that might change the constant arguments
-    // of the init and dinit calls. We need to tell the Attributor about this
-    // to avoid other parts using the current constant value for simpliication.
-    auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
-
-    Function *Fn = getAnchorScope();
-    if (!OMPInfoCache.Kernels.count(Fn))
-      return;
-
-    OMPInformationCache::RuntimeFunctionInfo &InitRFI =
-        OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
-    OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
-        OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];
-
-    // For kernels we perform more initialization work, first we find the init
-    // and deinit calls.
-    auto StoreCallBase = [](Use &U,
-                            OMPInformationCache::RuntimeFunctionInfo &RFI,
-                            CallBase *&Storage) {
-      CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
-      assert(CB &&
-             "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!");
-      assert(!Storage &&
-             "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!");
-      Storage = CB;
-      return false;
-    };
-    InitRFI.foreachUse(
-        [&](Use &U, Function &) {
-          StoreCallBase(U, InitRFI, KernelInitCB);
-          return false;
-        },
-        Fn);
-    DeinitRFI.foreachUse(
-        [&](Use &U, Function &) {
-          StoreCallBase(U, DeinitRFI, KernelDeinitCB);
-          return false;
-        },
-        Fn);
-
-    assert((KernelInitCB && KernelDeinitCB) &&
-           "Kernel without __kmpc_target_init or __kmpc_target_deinit!");
-
-    // For kernels we might need to initialize/finalize the IsSPMD state and
-    // we need to register a simplification callback so that the Attributor
-    // knows the constant arguments to __kmpc_target_init and
-    // __kmpc_target_deinit might actually change.
-
-    Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
-        [&](const IRPosition &IRP, const AbstractAttribute *AA,
-            bool &UsedAssumedInformation) -> Optional<Value *> {
-      // IRP represents the "use generic state machine" argument of an
-      // __kmpc_target_init call. We will answer this one with the internal
-      // state. As long as we are not in an invalid state, we will create a
-      // custom state machine so the value should be a `i1 false`. If we are
-      // in an invalid state, we won't change the value that is in the IR.
-      if (!isValidState())
-        return nullptr;
-      if (AA)
-        A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
-      UsedAssumedInformation = !isAtFixpoint();
-      auto *FalseVal =
-          ConstantInt::getBool(IRP.getAnchorValue().getContext(), 0);
-      return FalseVal;
-    };
-
-    Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB =
-        [&](const IRPosition &IRP, const AbstractAttribute *AA,
-            bool &UsedAssumedInformation) -> Optional<Value *> {
-      // IRP represents the "SPMDCompatibilityTracker" argument of an
-      // __kmpc_target_init or
-      // __kmpc_target_deinit call. We will answer this one with the internal
-      // state.
-      if (!isValidState())
-        return nullptr;
-      if (!SPMDCompatibilityTracker.isAtFixpoint()) {
-        if (AA)
-          A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
-        UsedAssumedInformation = true;
-      } else {
-        UsedAssumedInformation = false;
-      }
-      auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
-                                       SPMDCompatibilityTracker.isAssumed());
-      return Val;
-    };
-
-    constexpr const int InitIsSPMDArgNo = 1;
-    constexpr const int DeinitIsSPMDArgNo = 1;
-    constexpr const int InitUseStateMachineArgNo = 2;
-    A.registerSimplificationCallback(
-        IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
-        StateMachineSimplifyCB);
-    A.registerSimplificationCallback(
-        IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo),
-        IsSPMDModeSimplifyCB);
-    A.registerSimplificationCallback(
-        IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
-        IsSPMDModeSimplifyCB);
-
-    // Check if we know we are in SPMD-mode already.
-    ConstantInt *IsSPMDArg =
-        dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
-    if (IsSPMDArg && !IsSPMDArg->isZero())
-      SPMDCompatibilityTracker.indicateOptimisticFixpoint();
-  }
-
-  /// Modify the IR based on the KernelInfoState as the fixpoint iteration is
-  /// finished now.
-  ChangeStatus manifest(Attributor &A) override {
-    // If we are not looking at a kernel with __kmpc_target_init and
-    // __kmpc_target_deinit call we cannot actually manifest the information.
-    if (!KernelInitCB || !KernelDeinitCB)
-      return ChangeStatus::UNCHANGED;
-
-    // Known SPMD-mode kernels need no manifest changes.
-    if (SPMDCompatibilityTracker.isKnown())
-      return ChangeStatus::UNCHANGED;
-
-    // If we can we change the execution mode to SPMD-mode otherwise we build a
-    // custom state machine.
-    if (!changeToSPMDMode(A))
-      buildCustomStateMachine(A);
-
-    return ChangeStatus::CHANGED;
-  }
-
-  bool changeToSPMDMode(Attributor &A) {
-    if (!SPMDCompatibilityTracker.isAssumed()) {
-      for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
-        if (!NonCompatibleI)
-          continue;
-        auto Remark = [&](OptimizationRemarkAnalysis ORA) {
-          ORA << "Kernel will be executed in generic-mode due to this "
-                 "potential side-effect";
-          if (auto *CI = dyn_cast<CallBase>(NonCompatibleI)) {
-            if (Function *F = CI->getCalledFunction())
-              ORA << ", consider to add "
-                     "`__attribute__((assume(\"ompx_spmd_amenable\"))`"
-                     " to the called function '"
-                  << F->getName() << "'";
-          }
-          return ORA << ".";
-        };
-        A.emitRemark<OptimizationRemarkAnalysis>(
-            NonCompatibleI, "OpenMPKernelNonSPMDMode", Remark);
-
-        LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "
-                          << *NonCompatibleI << "\n");
-      }
-
-      return false;
-    }
-
-    // Adjust the global exec mode flag that tells the runtime what mode this
-    // kernel is executed in.
-    Function *Kernel = getAnchorScope();
-    GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
-        (Kernel->getName() + "_exec_mode").str());
-    assert(ExecMode && "Kernel without exec mode?");
-    assert(ExecMode->getInitializer() &&
-           ExecMode->getInitializer()->isOneValue() &&
-           "Initially non-SPMD kernel has SPMD exec mode!");
-    ExecMode->setInitializer(
-        ConstantInt::get(ExecMode->getInitializer()->getType(), 0));
-
-    // Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
-    const int InitIsSPMDArgNo = 1;
-    const int DeinitIsSPMDArgNo = 1;
-    const int InitUseStateMachineArgNo = 2;
-
-    auto &Ctx = getAnchorValue().getContext();
-    A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
-                             *ConstantInt::getBool(Ctx, 1));
-    A.changeUseAfterManifest(
-        KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
-        *ConstantInt::getBool(Ctx, 0));
-    A.changeUseAfterManifest(
-        KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
-        *ConstantInt::getBool(Ctx, 1));
-    ++NumOpenMPTargetRegionKernelsSPMD;
-
-    auto Remark = [&](OptimizationRemark OR) {
-      return OR << "Generic-mode kernel is changed to SPMD-mode.";
-    };
-    A.emitRemark<OptimizationRemark>(KernelInitCB, "OpenMPKernelSPMDMode",
-                                     Remark);
-    return true;
-  };
-
-  ChangeStatus buildCustomStateMachine(Attributor &A) {
-    assert(ReachedKnownParallelRegions.isValidState() &&
-           "Custom state machine with invalid parallel region states?");
-
-    const int InitIsSPMDArgNo = 1;
-    const int InitUseStateMachineArgNo = 2;
-
-    // Check if the current configuration is non-SPMD and generic state machine.
-    // If we already have SPMD mode or a custom state machine we do not need to
-    // go any further. If it is anything but a constant something is weird and
-    // we give up.
-    ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
-        KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
-    ConstantInt *IsSPMD =
-        dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
-
-    // If we are stuck with generic mode, try to create a custom device (=GPU)
-    // state machine which is specialized for the parallel regions that are
-    // reachable by the kernel.
-    if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD ||
-        !IsSPMD->isZero())
-      return ChangeStatus::UNCHANGED;
-
-    // If not SPMD mode, indicate we use a custom state machine now.
-    auto &Ctx = getAnchorValue().getContext();
-    auto *FalseVal = ConstantInt::getBool(Ctx, 0);
-    A.changeUseAfterManifest(
-        KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
-
-    // If we don't actually need a state machine we are done here. This can
-    // happen if there simply are no parallel regions. In the resulting kernel
-    // all worker threads will simply exit right away, leaving the main thread
-    // to do the work alone.
-    if (ReachedKnownParallelRegions.empty() &&
-        ReachedUnknownParallelRegions.empty()) {
-      ++NumOpenMPTargetRegionKernelsWithoutStateMachine;
-
-      auto Remark = [&](OptimizationRemark OR) {
-        return OR << "Generic-mode kernel is executed without state machine "
-                     "(good)";
-      };
-      A.emitRemark<OptimizationRemark>(
-          KernelInitCB, "OpenMPKernelWithoutStateMachine", Remark);
-
-      return ChangeStatus::CHANGED;
-    }
-
-    // Keep track in the statistics of our new shiny custom state machine.
-    if (ReachedUnknownParallelRegions.empty()) {
-      ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;
-
-      auto Remark = [&](OptimizationRemark OR) {
-        return OR << "Generic-mode kernel is executed with a customized state "
-                     "machine ["
-                  << ore::NV("ParallelRegions",
-                             ReachedKnownParallelRegions.size())
-                  << " known parallel regions] (good).";
-      };
-      A.emitRemark<OptimizationRemark>(
-          KernelInitCB, "OpenMPKernelWithCustomizedStateMachine", Remark);
-    } else {
-      ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;
-
-      auto Remark = [&](OptimizationRemark OR) {
-        return OR << "Generic-mode kernel is executed with a customized state "
-                     "machine that requires a fallback ["
-                  << ore::NV("ParallelRegions",
-                             ReachedKnownParallelRegions.size())
-                  << " known parallel regions, "
-                  << ore::NV("UnknownParallelRegions",
-                             ReachedUnknownParallelRegions.size())
-                  << " unkown parallel regions] (bad).";
-      };
-      A.emitRemark<OptimizationRemark>(
-          KernelInitCB, "OpenMPKernelWithCustomizedStateMachineAndFallback",
-          Remark);
-
-      // Tell the user why we ended up with a fallback.
-      for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
-        if (!UnknownParallelRegionCB)
-          continue;
-        auto Remark = [&](OptimizationRemarkAnalysis ORA) {
-          return ORA
-                 << "State machine fallback caused by this call. If it is a "
-                    "false positive, use "
-                    "`__attribute__((assume(\"omp_no_openmp\"))` "
-                    "(or \"omp_no_parallelism\").";
-        };
-        A.emitRemark<OptimizationRemarkAnalysis>(
-            UnknownParallelRegionCB,
-            "OpenMPKernelWithCustomizedStateMachineAndFallback", Remark);
-      }
-    }
-
-    // Create all the blocks:
-    //
-    //                       InitCB = __kmpc_target_init(...)
-    //                       bool IsWorker = InitCB >= 0;
-    //                       if (IsWorker) {
-    // SMBeginBB:               __kmpc_barrier_simple_spmd(...);
-    //                         void *WorkFn;
-    //                         bool Active = __kmpc_kernel_parallel(&WorkFn);
-    //                         if (!WorkFn) return;
-    // SMIsActiveCheckBB:       if (Active) {
-    // SMIfCascadeCurrentBB:      if      (WorkFn == <ParFn0>)
-    //                              ParFn0(...);
-    // SMIfCascadeCurrentBB:      else if (WorkFn == <ParFn1>)
-    //                              ParFn1(...);
-    //                            ...
-    // SMIfCascadeCurrentBB:      else
-    //                              ((WorkFnTy*)WorkFn)(...);
-    // SMEndParallelBB:           __kmpc_kernel_end_parallel(...);
-    //                          }
-    // SMDoneBB:                __kmpc_barrier_simple_spmd(...);
-    //                          goto SMBeginBB;
-    //                       }
-    // UserCodeEntryBB:      // user code
-    //                       __kmpc_target_deinit(...)
-    //
-    Function *Kernel = getAssociatedFunction();
-    assert(Kernel && "Expected an associated function!");
-
-    BasicBlock *InitBB = KernelInitCB->getParent();
-    BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
-        KernelInitCB->getNextNode(), "thread.user_code.check");
-    BasicBlock *StateMachineBeginBB = BasicBlock::Create(
-        Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
-    BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
-        Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
-    BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
-        Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
-    BasicBlock *StateMachineIfCascadeCurrentBB =
-        BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
-                           Kernel, UserCodeEntryBB);
-    BasicBlock *StateMachineEndParallelBB =
-        BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
-                           Kernel, UserCodeEntryBB);
-    BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
-        Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
-
-    const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
-    ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
-
-    InitBB->getTerminator()->eraseFromParent();
-    Instruction *IsWorker =
-        ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
-                         ConstantInt::get(KernelInitCB->getType(), -1),
-                         "thread.is_worker", InitBB);
-    IsWorker->setDebugLoc(DLoc);
-    BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);
-
-    // Create local storage for the work function pointer.
-    Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
-    AllocaInst *WorkFnAI = new AllocaInst(VoidPtrTy, 0, "worker.work_fn.addr",
-                                          &Kernel->getEntryBlock().front());
-    WorkFnAI->setDebugLoc(DLoc);
-
-    auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
-    OMPInfoCache.OMPBuilder.updateToLocation(
-        OpenMPIRBuilder::LocationDescription(
-            IRBuilder<>::InsertPoint(StateMachineBeginBB,
-                                     StateMachineBeginBB->end()),
-            DLoc));
-
-    Value *Ident = KernelInitCB->getArgOperand(0);
-    Value *GTid = KernelInitCB;
-
-    Module &M = *Kernel->getParent();
-    FunctionCallee BarrierFn =
-        OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
-            M, OMPRTL___kmpc_barrier_simple_spmd);
-    CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
-        ->setDebugLoc(DLoc);
-
-    FunctionCallee KernelParallelFn =
-        OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
-            M, OMPRTL___kmpc_kernel_parallel);
-    Instruction *IsActiveWorker = CallInst::Create(
-        KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
-    IsActiveWorker->setDebugLoc(DLoc);
-    Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
-                                       StateMachineBeginBB);
-    WorkFn->setDebugLoc(DLoc);
-
-    FunctionType *ParallelRegionFnTy = FunctionType::get(
-        Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
-        false);
-    Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
-        WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
-        StateMachineBeginBB);
-
-    Instruction *IsDone =
-        ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
-                         Constant::getNullValue(VoidPtrTy), "worker.is_done",
-                         StateMachineBeginBB);
-    IsDone->setDebugLoc(DLoc);
-    BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
-                       IsDone, StateMachineBeginBB)
-        ->setDebugLoc(DLoc);
-
-    BranchInst::Create(StateMachineIfCascadeCurrentBB,
-                       StateMachineDoneBarrierBB, IsActiveWorker,
-                       StateMachineIsActiveCheckBB)
-        ->setDebugLoc(DLoc);
-
-    Value *ZeroArg =
-        Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
-
-    // Now that we have most of the CFG skeleton it is time for the if-cascade
-    // that checks the function pointer we got from the runtime against the
-    // parallel regions we expect, if there are any.
-    for (int i = 0, e = ReachedKnownParallelRegions.size(); i < e; ++i) {
-      auto *ParallelRegion = ReachedKnownParallelRegions[i];
-      BasicBlock *PRExecuteBB = BasicBlock::Create(
-          Ctx, "worker_state_machine.parallel_region.execute", Kernel,
-          StateMachineEndParallelBB);
-      CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
-          ->setDebugLoc(DLoc);
-      BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
-          ->setDebugLoc(DLoc);
-
-      BasicBlock *PRNextBB =
-          BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
-                             Kernel, StateMachineEndParallelBB);
-
-      // Check if we need to compare the pointer at all or if we can just
-      // call the parallel region function.
-      Value *IsPR;
-      if (i + 1 < e || !ReachedUnknownParallelRegions.empty()) {
-        Instruction *CmpI = ICmpInst::Create(
-            ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
-            "worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
-        CmpI->setDebugLoc(DLoc);
-        IsPR = CmpI;
-      } else {
-        IsPR = ConstantInt::getTrue(Ctx);
-      }
-
-      BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
-                         StateMachineIfCascadeCurrentBB)
-          ->setDebugLoc(DLoc);
-      StateMachineIfCascadeCurrentBB = PRNextBB;
-    }
-
-    // At the end of the if-cascade we place the indirect function pointer call
-    // in case we might need it, that is if there can be parallel regions we
-    // have not handled in the if-cascade above.
-    if (!ReachedUnknownParallelRegions.empty()) {
-      StateMachineIfCascadeCurrentBB->setName(
-          "worker_state_machine.parallel_region.fallback.execute");
-      CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
-                       StateMachineIfCascadeCurrentBB)
-          ->setDebugLoc(DLoc);
-    }
-    BranchInst::Create(StateMachineEndParallelBB,
-                       StateMachineIfCascadeCurrentBB)
-        ->setDebugLoc(DLoc);
-
-    CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
-                         M, OMPRTL___kmpc_kernel_end_parallel),
-                     {}, "", StateMachineEndParallelBB)
-        ->setDebugLoc(DLoc);
-    BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
-        ->setDebugLoc(DLoc);
-
-    CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
-        ->setDebugLoc(DLoc);
-    BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
-        ->setDebugLoc(DLoc);
-
-    return ChangeStatus::CHANGED;
-  }
-
-  /// Fixpoint iteration update function. Will be called every time a dependence
-  /// changed its state (and in the beginning).
-  ChangeStatus updateImpl(Attributor &A) override {
-    KernelInfoState StateBefore = getState();
-
-    // Callback to check a read/write instruction.
-    auto CheckRWInst = [&](Instruction &I) {
-      // We handle calls later.
-      if (isa<CallBase>(I))
-        return true;
-      // We only care about write effects.
-      if (!I.mayWriteToMemory())
-        return true;
-      if (auto *SI = dyn_cast<StoreInst>(&I)) {
-        SmallVector<const Value *> Objects;
-        getUnderlyingObjects(SI->getPointerOperand(), Objects);
-        if (llvm::all_of(Objects,
-                         [](const Value *Obj) { return isa<AllocaInst>(Obj); }))
-          return true;
-      }
-      // For now we give up on everything but stores.
-      SPMDCompatibilityTracker.insert(&I);
-      return true;
-    };
-
-    bool UsedAssumedInformationInCheckRWInst = false;
-    if (!A.checkForAllReadWriteInstructions(
-            CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
-      SPMDCompatibilityTracker.indicatePessimisticFixpoint();
-
-    // Callback to check a call instruction.
-    auto CheckCallInst = [&](Instruction &I) {
-      auto &CB = cast<CallBase>(I);
-      auto &CBAA = A.getAAFor<AAKernelInfo>(
-          *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
-      if (CBAA.getState().isValidState())
-        getState() ^= CBAA.getState();
-      return true;
-    };
-
-    bool UsedAssumedInformationInCheckCallInst = false;
-    if (!A.checkForAllCallLikeInstructions(
-            CheckCallInst, *this, UsedAssumedInformationInCheckCallInst))
-      return indicatePessimisticFixpoint();
-
-    return StateBefore == getState() ? ChangeStatus::UNCHANGED
-                                     : ChangeStatus::CHANGED;
-  }
-};
-
-/// The call site kernel info abstract attribute, basically, what can we say
-/// about a call site with regards to the KernelInfoState. For now this simply
-/// forwards the information from the callee.
-struct AAKernelInfoCallSite : AAKernelInfo {
-  AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
-      : AAKernelInfo(IRP, A) {}
-
-  /// See AbstractAttribute::initialize(...).
-  void initialize(Attributor &A) override {
-    AAKernelInfo::initialize(A);
-
-    CallBase &CB = cast<CallBase>(getAssociatedValue());
-    Function *Callee = getAssociatedFunction();
-
-    // Helper to lookup an assumption string.
-    auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) {
-      return Fn && hasAssumption(*Fn, AssumptionStr);
-    };
-
-    // Check for SPMD-mode assumptions.
-    if (HasAssumption(Callee, "ompx_spmd_amenable"))
-      SPMDCompatibilityTracker.indicateOptimisticFixpoint();
-
-    // First weed out calls we do not care about, that is readonly/readnone
-    // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
-    // parallel region or anything else we are looking for.
-    if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
-      indicateOptimisticFixpoint();
-      return;
-    }
-
-    // Next we check if we know the callee. If it is a known OpenMP function
-    // we will handle them explicitly in the switch below. If it is not, we
-    // will use an AAKernelInfo object on the callee to gather information and
-    // merge that into the current state. The latter happens in the updateImpl.
-    auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
-    const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
-    if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
-      // Unknown caller or declarations are not analyzable, we give up.
-      if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
-
-        // Unknown callees might contain parallel regions, except if they have
-        // an appropriate assumption attached.
-        if (!(HasAssumption(Callee, "omp_no_openmp") ||
-              HasAssumption(Callee, "omp_no_parallelism")))
-          ReachedUnknownParallelRegions.insert(&CB);
-
-        // If SPMDCompatibilityTracker is not fixed, we need to give up on the
-        // idea we can run something unknown in SPMD-mode.
-        if (!SPMDCompatibilityTracker.isAtFixpoint())
-          SPMDCompatibilityTracker.insert(&CB);
-
-        // We have updated the state for this unknown call properly, there won't
-        // be any change so we indicate a fixpoint.
-        indicateOptimisticFixpoint();
-      }
-      // If the callee is known and can be used in IPO, we will update the state
-      // based on the callee state in updateImpl.
-      return;
-    }
-
-    const unsigned int WrapperFunctionArgNo = 6;
-    RuntimeFunction RF = It->getSecond();
-    switch (RF) {
-    // All the functions we know are compatible with SPMD mode.
-    case OMPRTL___kmpc_is_spmd_exec_mode:
-    case OMPRTL___kmpc_for_static_fini:
-    case OMPRTL___kmpc_global_thread_num:
-    case OMPRTL___kmpc_single:
-    case OMPRTL___kmpc_end_single:
-    case OMPRTL___kmpc_master:
-    case OMPRTL___kmpc_end_master:
-    case OMPRTL___kmpc_barrier:
-      break;
-    case OMPRTL___kmpc_for_static_init_4:
-    case OMPRTL___kmpc_for_static_init_4u:
-    case OMPRTL___kmpc_for_static_init_8:
-    case OMPRTL___kmpc_for_static_init_8u: {
-      // Check the schedule and allow static schedule in SPMD mode.
-      unsigned ScheduleArgOpNo = 2;
-      auto *ScheduleTypeCI =
-          dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
-      unsigned ScheduleTypeVal =
-          ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
-      switch (OMPScheduleType(ScheduleTypeVal)) {
-      case OMPScheduleType::Static:
-      case OMPScheduleType::StaticChunked:
-      case OMPScheduleType::Distribute:
-      case OMPScheduleType::DistributeChunked:
-        break;
-      default:
-        SPMDCompatibilityTracker.insert(&CB);
-        break;
-      };
-    } break;
-    case OMPRTL___kmpc_target_init:
-      KernelInitCB = &CB;
-      break;
-    case OMPRTL___kmpc_target_deinit:
-      KernelDeinitCB = &CB;
-      break;
-    case OMPRTL___kmpc_parallel_51:
-      if (auto *ParallelRegion = dyn_cast<Function>(
-              CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
-        ReachedKnownParallelRegions.insert(ParallelRegion);
-        break;
-      }
-      // The condition above should usually get the parallel region function
-      // pointer and record it. In the off chance it doesn't we assume the
-      // worst.
-      ReachedUnknownParallelRegions.insert(&CB);
-      break;
-    case OMPRTL___kmpc_omp_task:
-      // We do not look into tasks right now, just give up.
-      SPMDCompatibilityTracker.insert(&CB);
-      ReachedUnknownParallelRegions.insert(&CB);
-      break;
-    default:
-      // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
-      // generally.
-      SPMDCompatibilityTracker.insert(&CB);
-      break;
-    }
-    // All other OpenMP runtime calls will not reach parallel regions so they
-    // can be safely ignored for now. Since it is a known OpenMP runtime call we
-    // have now modeled all effects and there is no need for any update.
-    indicateOptimisticFixpoint();
-  }
-
-  ChangeStatus updateImpl(Attributor &A) override {
-    // TODO: Once we have call site specific value information we can provide
-    //       call site specific liveness information and then it makes
-    //       sense to specialize attributes for call sites arguments instead of
-    //       redirecting requests to the callee argument.
-    Function *F = getAssociatedFunction();
-    const IRPosition &FnPos = IRPosition::function(*F);
-    auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
-    if (getState() == FnAA.getState())
-      return ChangeStatus::UNCHANGED;
-    getState() = FnAA.getState();
-    return ChangeStatus::CHANGED;
-  }
-};
-
 } // namespace
 
-void OpenMPOpt::registerAAs(bool IsModulePass) {
-  if (SCC.empty())
-
-    return;
-  if (IsModulePass) {
-    // Ensure we create the AAKernelInfo AAs first and without triggering an
-    // update. This will make sure we register all value simplification
-    // callbacks before any other AA has the chance to create an AAValueSimplify
-    // or similar.
-    for (Function *Kernel : OMPInfoCache.Kernels)
-      A.getOrCreateAAFor<AAKernelInfo>(
-          IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
-          DepClassTy::NONE, /* ForceUpdate */ false,
-          /* UpdateAfterInit */ false);
-  }
-
-  // Create CallSite AA for all Getters.
-  for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
-    auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
-
-    auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
-
-    auto CreateAA = [&](Use &U, Function &Caller) {
-      CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
-      if (!CI)
-        return false;
-
-      auto &CB = cast<CallBase>(*CI);
-
-      IRPosition CBPos = IRPosition::callsite_function(CB);
-      A.getOrCreateAAFor<AAICVTracker>(CBPos);
-      return false;
-    };
-
-    GetterRFI.foreachUse(SCC, CreateAA);
-  }
-  auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
-  auto CreateAA = [&](Use &U, Function &F) {
-    A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
-    return false;
-  };
-  GlobalizationRFI.foreachUse(SCC, CreateAA);
-
-  // Create an ExecutionDomain AA for every function and a HeapToStack AA for
-  // every function if there is a device kernel.
-  for (auto *F : SCC) {
-    if (!F->isDeclaration())
-      A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
-    if (isOpenMPDevice(M))
-      A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
-  }
-}
-
 const char AAICVTracker::ID = 0;
-const char AAKernelInfo::ID = 0;
 const char AAExecutionDomain::ID = 0;
 const char AAHeapToShared::ID = 0;
 
@@ -3505,28 +2628,6 @@ AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
   return *AA;
 }
 
-AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
-                                              Attributor &A) {
-  AAKernelInfo *AA = nullptr;
-  switch (IRP.getPositionKind()) {
-  case IRPosition::IRP_INVALID:
-  case IRPosition::IRP_FLOAT:
-  case IRPosition::IRP_ARGUMENT:
-  case IRPosition::IRP_RETURNED:
-  case IRPosition::IRP_CALL_SITE_RETURNED:
-  case IRPosition::IRP_CALL_SITE_ARGUMENT:
-    llvm_unreachable("KernelInfo can only be created for function position!");
-  case IRPosition::IRP_CALL_SITE:
-    AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
-    break;
-  case IRPosition::IRP_FUNCTION:
-    AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
-    break;
-  }
-
-  return *AA;
-}
-
 PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
   if (!containsOpenMP(M))
     return PreservedAnalyses::all();

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
index f1a6f60d0a00..2fdaa8be8541 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
index 0833a4d88389..7382ea8d345e 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
@@ -11,7 +11,7 @@ define internal i32 @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X, i32
 ;
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f
-; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval([[STRUCT_SS:%.*]]) align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree noundef nonnull byval(i32) align 4 dereferenceable(4) [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree noundef nonnull byval([[STRUCT_SS:%.*]]) align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree noundef nonnull byval(i32) align 4 dereferenceable(4) [[X:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B]], i32 0, i32 0
 ; IS__TUNIT_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
@@ -24,7 +24,7 @@ define internal i32 @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X, i32
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f
-; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32, align 4
 ; IS__TUNIT_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]], align 4
@@ -100,7 +100,7 @@ define i32 @test(i32* %X) {
 ; IS__TUNIT_OPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; IS__TUNIT_OPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; IS__TUNIT_OPM-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; IS__TUNIT_OPM-NEXT:    [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_SS]]) align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval(i32) align 4 [[X]]) #[[ATTR0]]
+; IS__TUNIT_OPM-NEXT:    [[C:%.*]] = call i32 @f(%struct.ss* noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_SS]]) align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval(i32) align 4 [[X]], i32 noundef zeroext 0) #[[ATTR0]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[C]]
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
@@ -117,7 +117,7 @@ define i32 @test(i32* %X) {
 ; IS__TUNIT_NPM-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8
 ; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 4
-; IS__TUNIT_NPM-NEXT:    [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) #[[ATTR0]]
+; IS__TUNIT_NPM-NEXT:    [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]], i32 noundef zeroext 0) #[[ATTR0]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[C]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
index de507064beb8..36086e9b4bd4 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 
@@ -9,7 +9,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 define internal i32 @callee(i1 %C, i32* %P) {
 ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callee
-; IS__TUNIT_OPM-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: (i1 noundef [[C:%.*]], i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:    br label [[F:%.*]]
 ; IS__TUNIT_OPM:       T:
 ; IS__TUNIT_OPM-NEXT:    unreachable
@@ -19,7 +19,7 @@ define internal i32 @callee(i1 %C, i32* %P) {
 ;
 ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee
-; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_NPM-SAME: (i1 noundef [[C:%.*]], i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT_NPM-NEXT:    [[P_PRIV:%.*]] = alloca i32, align 4
 ; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[P_PRIV]], align 4
 ; IS__TUNIT_NPM-NEXT:    br label [[F:%.*]]
@@ -67,7 +67,7 @@ define i32 @foo() {
 ; IS__TUNIT_OPM-SAME: () #[[ATTR1:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; IS__TUNIT_OPM-NEXT:    store i32 17, i32* [[A]], align 4
-; IS__TUNIT_OPM-NEXT:    [[X:%.*]] = call i32 @callee(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]]) #[[ATTR2:[0-9]+]]
+; IS__TUNIT_OPM-NEXT:    [[X:%.*]] = call i32 @callee(i1 noundef false, i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A]]) #[[ATTR2:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[X]]
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
@@ -76,7 +76,7 @@ define i32 @foo() {
 ; IS__TUNIT_NPM-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; IS__TUNIT_NPM-NEXT:    store i32 17, i32* [[A]], align 4
 ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 4
-; IS__TUNIT_NPM-NEXT:    [[X:%.*]] = call i32 @callee(i32 [[TMP1]]) #[[ATTR2:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    [[X:%.*]] = call i32 @callee(i1 noundef false, i32 [[TMP1]]) #[[ATTR2:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[X]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
index e3d4a5586952..4b9b6577e765 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
@@ -21,18 +21,31 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: @[[A:[a-zA-Z0-9_$"\\.-]+]] = internal global [[STRUCT_FOO:%.*]] { i32 1, i64 2 }, align 8
 ;.
 define void @run() {
+; IS________OPM: Function Attrs: nofree noreturn nosync nounwind readnone
+; IS________OPM-LABEL: define {{[^@]+}}@run
+; IS________OPM-SAME: () #[[ATTR0:[0-9]+]] {
+; IS________OPM-NEXT:  entry:
+; IS________OPM-NEXT:    [[TMP0:%.*]] = call i64 @CaptureAStruct(%struct.Foo* nocapture nofree noundef nonnull readonly byval([[STRUCT_FOO:%.*]]) align 8 dereferenceable(16) @a) #[[ATTR0]]
+; IS________OPM-NEXT:    unreachable
 ;
-; NOT_CGSCC_NPM: Function Attrs: nofree noreturn nosync nounwind readnone
-; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@run
-; NOT_CGSCC_NPM-SAME: () #[[ATTR0:[0-9]+]] {
-; NOT_CGSCC_NPM-NEXT:  entry:
-; NOT_CGSCC_NPM-NEXT:    [[TMP0:%.*]] = call i64 @CaptureAStruct() #[[ATTR0]]
-; NOT_CGSCC_NPM-NEXT:    unreachable
+; IS__TUNIT_NPM: Function Attrs: nofree noreturn nosync nounwind readnone
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@run
+; IS__TUNIT_NPM-SAME: () #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[A_CAST:%.*]] = bitcast %struct.Foo* @a to i32*
+; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_CAST]], align 8
+; IS__TUNIT_NPM-NEXT:    [[A_0_1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* @a, i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_0_1]], align 8
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = call i64 @CaptureAStruct(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR0]]
+; IS__TUNIT_NPM-NEXT:    unreachable
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn
+; IS__CGSCC____: Function Attrs: nofree norecurse noreturn nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@run
 ; IS__CGSCC____-SAME: () #[[ATTR0:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_FOO:%.*]], %struct.Foo* @a, i32 0, i32 0), align 8
+; IS__CGSCC____-NEXT:    [[A_0_1:%.*]] = getelementptr [[STRUCT_FOO]], %struct.Foo* @a, i32 0, i32 1
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_0_1]], align 8
 ; IS__CGSCC____-NEXT:    unreachable
 ;
 entry:
@@ -92,28 +105,53 @@ define internal i64 @CaptureAStruct(%struct.Foo* byval(%struct.Foo) %a) {
 ; IS__CGSCC_OPM-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0
 ; IS__CGSCC_OPM-NEXT:    br label [[LOOP]]
 ;
-; NOT_CGSCC_NPM: Function Attrs: nofree noreturn nosync nounwind readnone
-; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@CaptureAStruct
-; NOT_CGSCC_NPM-SAME: () #[[ATTR0]] {
-; NOT_CGSCC_NPM-NEXT:  entry:
-; NOT_CGSCC_NPM-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*, align 8
-; NOT_CGSCC_NPM-NEXT:    br label [[LOOP:%.*]]
-; NOT_CGSCC_NPM:       loop:
-; NOT_CGSCC_NPM-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ @a, [[LOOP]] ]
-; NOT_CGSCC_NPM-NEXT:    [[TMP0:%.*]] = phi %struct.Foo* [ @a, [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ]
-; NOT_CGSCC_NPM-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8
-; NOT_CGSCC_NPM-NEXT:    br label [[LOOP]]
+; IS________OPM: Function Attrs: nofree noreturn nosync nounwind readnone
+; IS________OPM-LABEL: define {{[^@]+}}@CaptureAStruct
+; IS________OPM-SAME: (%struct.Foo* noalias nofree noundef nonnull byval([[STRUCT_FOO:%.*]]) align 8 dereferenceable(16) [[A:%.*]]) #[[ATTR0]] {
+; IS________OPM-NEXT:  entry:
+; IS________OPM-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*, align 8
+; IS________OPM-NEXT:    br label [[LOOP:%.*]]
+; IS________OPM:       loop:
+; IS________OPM-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; IS________OPM-NEXT:    [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ]
+; IS________OPM-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8
+; IS________OPM-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A]], i64 0
+; IS________OPM-NEXT:    br label [[LOOP]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree noreturn nosync nounwind readnone
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@CaptureAStruct
+; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; IS__TUNIT_NPM-NEXT:    [[A_PRIV_CAST:%.*]] = bitcast %struct.Foo* [[A_PRIV]] to i32*
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV_CAST]], align 4
+; IS__TUNIT_NPM-NEXT:    [[A_PRIV_0_1:%.*]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    store i64 [[TMP1]], i64* [[A_PRIV_0_1]], align 8
+; IS__TUNIT_NPM-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*, align 8
+; IS__TUNIT_NPM-NEXT:    br label [[LOOP:%.*]]
+; IS__TUNIT_NPM:       loop:
+; IS__TUNIT_NPM-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = phi %struct.Foo* [ [[A_PRIV]], [[ENTRY]] ], [ [[TMP2]], [[LOOP]] ]
+; IS__TUNIT_NPM-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8
+; IS__TUNIT_NPM-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i64 0
+; IS__TUNIT_NPM-NEXT:    br label [[LOOP]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse noreturn nosync nounwind readnone
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@CaptureAStruct
-; IS__CGSCC____-SAME: () #[[ATTR2:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
+; IS__CGSCC____-NEXT:    [[A_PRIV_CAST:%.*]] = bitcast %struct.Foo* [[A_PRIV]] to i32*
+; IS__CGSCC____-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV_CAST]], align 8
+; IS__CGSCC____-NEXT:    [[A_PRIV_0_1:%.*]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i32 0, i32 1
+; IS__CGSCC____-NEXT:    store i64 [[TMP1]], i64* [[A_PRIV_0_1]], align 8
 ; IS__CGSCC____-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*, align 8
 ; IS__CGSCC____-NEXT:    br label [[LOOP:%.*]]
 ; IS__CGSCC____:       loop:
-; IS__CGSCC____-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ @a, [[LOOP]] ]
-; IS__CGSCC____-NEXT:    [[TMP0:%.*]] = phi %struct.Foo* [ @a, [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ]
+; IS__CGSCC____-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = phi %struct.Foo* [ [[A_PRIV]], [[ENTRY]] ], [ [[TMP2]], [[LOOP]] ]
 ; IS__CGSCC____-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8
+; IS__CGSCC____-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i64 0
 ; IS__CGSCC____-NEXT:    br label [[LOOP]]
 ;
 entry:
@@ -128,9 +166,9 @@ loop:
   br label %loop
 }
 ;.
-; NOT_CGSCC_NPM: attributes #[[ATTR0]] = { nofree noreturn nosync nounwind readnone }
+; NOT_CGSCC_NPM: attributes #[[ATTR0:[0-9]+]] = { nofree noreturn nosync nounwind readnone }
 ;.
-; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse noreturn nosync nounwind readonly willreturn }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__CGSCC____: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind readnone }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
index 77db257d9c22..def445504fa0 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
index 4268688c4fc6..eb593ae7edbf 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll
index 18ab6b300fcf..ab8c0843cb25 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/reserve-tbaa.ll
@@ -26,7 +26,7 @@
 define internal fastcc void @fn(i32* nocapture readonly %p1, i64* nocapture readonly %p2) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@fn
-; IS__TUNIT____-SAME: () #[[ATTR0:[0-9]+]] {
+; IS__TUNIT____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[TMP0:%.*]] = load i32, i32* @g, align 4, !tbaa [[TBAA0:![0-9]+]]
 ; IS__TUNIT____-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
@@ -60,7 +60,7 @@ define i32 @main() {
 ; IS__TUNIT____-NEXT:    store i32* @g, i32** [[TMP0]], align 8, !tbaa [[TBAA5]]
 ; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = load i32*, i32** @a, align 8, !tbaa [[TBAA5]]
 ; IS__TUNIT____-NEXT:    store i32 1, i32* [[TMP1]], align 4, !tbaa [[TBAA0]]
-; IS__TUNIT____-NEXT:    call fastcc void @fn() #[[ATTR0]]
+; IS__TUNIT____-NEXT:    call fastcc void @fn(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) @g) #[[ATTR0]]
 ; IS__TUNIT____-NEXT:    ret i32 0
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll
index e36f11acb4ed..0e1a00c3bcb2 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/sret.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
index a24511fed5f3..563d58387e5b 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -53,16 +53,32 @@ return:                                           ; preds = %entry
 }
 
 define internal i32 @vfu2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwind readonly {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@vfu2
-; IS__TUNIT____-SAME: () #[[ATTR0:[0-9]+]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
-; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0), align 8
-; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
-; IS__TUNIT____-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], [[TMP1]]
-; IS__TUNIT____-NEXT:    ret i32 [[TMP4]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readonly willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@vfu2
+; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) [[U:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
+; IS__TUNIT_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
+; IS__TUNIT_OPM-NEXT:    [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0), align 8
+; IS__TUNIT_OPM-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
+; IS__TUNIT_OPM-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], [[TMP1]]
+; IS__TUNIT_OPM-NEXT:    ret i32 [[TMP4]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readonly willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@vfu2
+; IS__TUNIT_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]], align 8
+; IS__TUNIT_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
+; IS__TUNIT_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]], align 1
+; IS__TUNIT_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4
+; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = load i8, i8* getelementptr inbounds ([[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0), align 8
+; IS__TUNIT_NPM-NEXT:    [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
+; IS__TUNIT_NPM-NEXT:    [[TMP6:%.*]] = add i32 [[TMP5]], [[TMP3]]
+; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP6]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@vfu2
@@ -86,12 +102,23 @@ entry:
 }
 
 define i32 @unions() nounwind {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@unions
-; IS__TUNIT____-SAME: () #[[ATTR1:[0-9]+]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[RESULT:%.*]] = call i32 @vfu2() #[[ATTR0]]
-; IS__TUNIT____-NEXT:    ret i32 [[RESULT]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions
+; IS__TUNIT_OPM-SAME: () #[[ATTR1:[0-9]+]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nocapture nofree noundef nonnull readonly byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) @mystr) #[[ATTR0]]
+; IS__TUNIT_OPM-NEXT:    ret i32 [[RESULT]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions
+; IS__TUNIT_NPM-SAME: () #[[ATTR1:[0-9]+]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
+; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
+; IS__TUNIT_NPM-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
+; IS__TUNIT_NPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]]
+; IS__TUNIT_NPM-NEXT:    ret i32 [[RESULT]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@unions
@@ -227,8 +254,8 @@ entry:
   ret i32 %result
 }
 ;.
-; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind readonly willreturn }
-; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn }
+; IS__TUNIT____: attributes #[[ATTR0:[0-9]+]] = { nofree nosync nounwind readonly willreturn }
+; IS__TUNIT____: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind willreturn }
 ; IS__TUNIT____: attributes #[[ATTR2:[0-9]+]] = { nofree nosync nounwind readnone willreturn }
 ;.
 ; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn }

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll
index a5765ed3e9d7..42db14f0bc04 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll
@@ -8,20 +8,37 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define i64 @fn2() {
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn2
+; IS__TUNIT_OPM-SAME: () #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 undef) #[[ATTR0]]
+; IS__TUNIT_OPM-NEXT:    ret i64 [[CALL2]]
 ;
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@fn2
-; IS__TUNIT____-SAME: () #[[ATTR0:[0-9]+]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    ret i64 undef
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn2
+; IS__TUNIT_NPM-SAME: () #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 undef) #[[ATTR0]], !range [[RNG0:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    ret i64 [[CALL2]]
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2
-; IS__CGSCC____-SAME: () #[[ATTR0:[0-9]+]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CONV:%.*]] = sext i32 undef to i64
-; IS__CGSCC____-NEXT:    [[DIV:%.*]] = sdiv i64 8, 0
-; IS__CGSCC____-NEXT:    ret i64 undef
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn2
+; IS__CGSCC_OPM-SAME: () #[[ATTR0:[0-9]+]] {
+; IS__CGSCC_OPM-NEXT:  entry:
+; IS__CGSCC_OPM-NEXT:    [[CONV:%.*]] = sext i32 undef to i64
+; IS__CGSCC_OPM-NEXT:    [[DIV:%.*]] = sdiv i64 8, 0
+; IS__CGSCC_OPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 undef) #[[ATTR1:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    ret i64 [[CALL2]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn2
+; IS__CGSCC_NPM-SAME: () #[[ATTR0:[0-9]+]] {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    [[CONV:%.*]] = sext i32 undef to i64
+; IS__CGSCC_NPM-NEXT:    [[DIV:%.*]] = sdiv i64 8, 0
+; IS__CGSCC_NPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 undef) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]]
+; IS__CGSCC_NPM-NEXT:    ret i64 [[CALL2]]
 ;
 entry:
   %conv = sext i32 undef to i64
@@ -31,7 +48,6 @@ entry:
 }
 
 define i64 @fn2b(i32 %arg) {
-;
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn2b
 ; IS__TUNIT_OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] {
@@ -47,24 +63,17 @@ define i64 @fn2b(i32 %arg) {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[CONV:%.*]] = sext i32 [[ARG]] to i64
 ; IS__TUNIT_NPM-NEXT:    [[DIV:%.*]] = sdiv i64 8, [[CONV]]
-; IS__TUNIT_NPM-NEXT:    ret i64 [[DIV]]
+; IS__TUNIT_NPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #[[ATTR0]], !range [[RNG0]]
+; IS__TUNIT_NPM-NEXT:    ret i64 [[CALL2]]
 ;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn2b
-; IS__CGSCC_OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[CONV:%.*]] = sext i32 [[ARG]] to i64
-; IS__CGSCC_OPM-NEXT:    [[DIV:%.*]] = sdiv i64 8, [[CONV]]
-; IS__CGSCC_OPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #[[ATTR1:[0-9]+]]
-; IS__CGSCC_OPM-NEXT:    ret i64 [[CALL2]]
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn2b
-; IS__CGSCC_NPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[CONV:%.*]] = sext i32 [[ARG]] to i64
-; IS__CGSCC_NPM-NEXT:    [[DIV:%.*]] = sdiv i64 8, [[CONV]]
-; IS__CGSCC_NPM-NEXT:    ret i64 [[DIV]]
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2b
+; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    [[CONV:%.*]] = sext i32 [[ARG]] to i64
+; IS__CGSCC____-NEXT:    [[DIV:%.*]] = sdiv i64 8, [[CONV]]
+; IS__CGSCC____-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]]
+; IS__CGSCC____-NEXT:    ret i64 [[CALL2]]
 ;
 entry:
   %conv = sext i32 %arg to i64
@@ -74,23 +83,35 @@ entry:
 }
 
 define i64 @fn2c() {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@fn2c
-; IS__TUNIT____-SAME: () #[[ATTR0]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    ret i64 42
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn2c
+; IS__TUNIT_OPM-SAME: () #[[ATTR0]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 noundef 42) #[[ATTR0]]
+; IS__TUNIT_OPM-NEXT:    ret i64 [[CALL2]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn2c
+; IS__TUNIT_NPM-SAME: () #[[ATTR0]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 noundef 42) #[[ATTR0]], !range [[RNG0]]
+; IS__TUNIT_NPM-NEXT:    ret i64 [[CALL2]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn2c
 ; IS__CGSCC_OPM-SAME: () #[[ATTR0]] {
 ; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    ret i64 42
+; IS__CGSCC_OPM-NEXT:    [[CONV:%.*]] = sext i32 undef to i64
+; IS__CGSCC_OPM-NEXT:    [[ADD:%.*]] = add i64 42, 0
+; IS__CGSCC_OPM-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 noundef 42) #[[ATTR1]]
+; IS__CGSCC_OPM-NEXT:    ret i64 [[CALL2]]
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn2c
 ; IS__CGSCC_NPM-SAME: () #[[ATTR0]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[CONV:%.*]] = sext i32 undef to i64
+; IS__CGSCC_NPM-NEXT:    [[ADD:%.*]] = add i64 42, 0
 ; IS__CGSCC_NPM-NEXT:    ret i64 42
 ;
 entry:
@@ -101,23 +122,21 @@ entry:
 }
 
 define internal i64 @fn1(i64 %p1) {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn1
-; IS__TUNIT_OPM-SAME: (i64 returned [[P1:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    ret i64 [[P1]]
-;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn1
-; IS__CGSCC_OPM-SAME: (i64 returned [[P1:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    ret i64 [[P1]]
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@fn1
+; IS__TUNIT____-SAME: (i64 returned [[P1:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT____-NEXT:  entry:
+; IS__TUNIT____-NEXT:    [[TOBOOL:%.*]] = icmp ne i64 [[P1]], 0
+; IS__TUNIT____-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 [[P1]], i64 [[P1]]
+; IS__TUNIT____-NEXT:    ret i64 [[COND]]
 ;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn1
-; IS__CGSCC_NPM-SAME: (i64 [[P1:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    ret i64 undef
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@fn1
+; IS__CGSCC____-SAME: (i64 returned [[P1:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    [[TOBOOL:%.*]] = icmp ne i64 [[P1]], 0
+; IS__CGSCC____-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i64 [[P1]], i64 [[P1]]
+; IS__CGSCC____-NEXT:    ret i64 [[COND]]
 ;
 entry:
   %tobool = icmp ne i64 %p1, 0
@@ -127,8 +146,10 @@ entry:
 ;.
 ; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn }
 ;.
-; IS__CGSCC_OPM: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR1]] = { readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR1]] = { readnone willreturn }
+;.
+; IS__TUNIT_NPM: [[RNG0]] = !{i64 -2147483606, i64 2147483690}
 ;.
-; IS__CGSCC_NPM: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
+; IS__CGSCC____: [[RNG0]] = !{i64 -8, i64 43}
 ;.

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
index 0a96ff4852df..288e55874951 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
@@ -8,67 +8,37 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @fn2(i32* %P, i1 %C) {
 ;
-; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn2
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    br label [[IF_END:%.*]]
-; IS__TUNIT_OPM:       for.cond1:
-; IS__TUNIT_OPM-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
-; IS__TUNIT_OPM:       if.end:
-; IS__TUNIT_OPM-NEXT:    [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__TUNIT_OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4
-; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]]) #[[ATTR3:[0-9]+]]
-; IS__TUNIT_OPM-NEXT:    store i32 [[CALL]], i32* [[P]], align 4
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND1]]
-; IS__TUNIT_OPM:       exit:
-; IS__TUNIT_OPM-NEXT:    ret void
-;
-; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn2
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    br label [[IF_END:%.*]]
-; IS__TUNIT_NPM:       for.cond1:
-; IS__TUNIT_NPM-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
-; IS__TUNIT_NPM:       if.end:
-; IS__TUNIT_NPM-NEXT:    [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[P]], align 4
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND1]]
-; IS__TUNIT_NPM:       exit:
-; IS__TUNIT_NPM-NEXT:    ret void
-;
-; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn2
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    br label [[IF_END:%.*]]
-; IS__CGSCC_OPM:       for.cond1:
-; IS__CGSCC_OPM-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
-; IS__CGSCC_OPM:       if.end:
-; IS__CGSCC_OPM-NEXT:    [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4
-; IS__CGSCC_OPM-NEXT:    [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]])
-; IS__CGSCC_OPM-NEXT:    store i32 [[CALL]], i32* [[P]], align 4
-; IS__CGSCC_OPM-NEXT:    br label [[FOR_COND1]]
-; IS__CGSCC_OPM:       exit:
-; IS__CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn2
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    br label [[IF_END:%.*]]
-; IS__CGSCC_NPM:       for.cond1:
-; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
-; IS__CGSCC_NPM:       if.end:
-; IS__CGSCC_NPM-NEXT:    [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[P]], align 4
-; IS__CGSCC_NPM-NEXT:    br label [[FOR_COND1]]
-; IS__CGSCC_NPM:       exit:
-; IS__CGSCC_NPM-NEXT:    ret void
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind
+; IS__TUNIT____-LABEL: define {{[^@]+}}@fn2
+; IS__TUNIT____-SAME: (i32* nocapture nofree [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT____-NEXT:  entry:
+; IS__TUNIT____-NEXT:    br label [[IF_END:%.*]]
+; IS__TUNIT____:       for.cond1:
+; IS__TUNIT____-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
+; IS__TUNIT____:       if.end:
+; IS__TUNIT____-NEXT:    [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
+; IS__TUNIT____-NEXT:    [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]]) #[[ATTR3:[0-9]+]]
+; IS__TUNIT____-NEXT:    store i32 [[CALL]], i32* [[P]], align 4
+; IS__TUNIT____-NEXT:    br label [[FOR_COND1]]
+; IS__TUNIT____:       exit:
+; IS__TUNIT____-NEXT:    ret void
+;
+; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind
+; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2
+; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    br label [[IF_END:%.*]]
+; IS__CGSCC____:       for.cond1:
+; IS__CGSCC____-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
+; IS__CGSCC____:       if.end:
+; IS__CGSCC____-NEXT:    [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
+; IS__CGSCC____-NEXT:    [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]])
+; IS__CGSCC____-NEXT:    store i32 [[CALL]], i32* [[P]], align 4
+; IS__CGSCC____-NEXT:    br label [[FOR_COND1]]
+; IS__CGSCC____:       exit:
+; IS__CGSCC____-NEXT:    ret void
 ;
 entry:
   br label %if.end
@@ -87,23 +57,21 @@ exit:
 }
 
 define internal i32 @fn1(i32 %p1) {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn1
-; IS__TUNIT_OPM-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] {
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    ret i32 [[P1]]
-;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn1
-; IS__CGSCC_OPM-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    ret i32 [[P1]]
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn1
-; IS__CGSCC_NPM-SAME: (i32 [[P1:%.*]]) #[[ATTR1:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@fn1
+; IS__TUNIT____-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] {
+; IS__TUNIT____-NEXT:  entry:
+; IS__TUNIT____-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[P1]], 0
+; IS__TUNIT____-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[P1]], i32 [[P1]]
+; IS__TUNIT____-NEXT:    ret i32 [[COND]]
+;
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@fn1
+; IS__CGSCC____-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[P1]], 0
+; IS__CGSCC____-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[P1]], i32 [[P1]]
+; IS__CGSCC____-NEXT:    ret i32 [[COND]]
 ;
 entry:
   %tobool = icmp ne i32 %p1, 0
@@ -113,67 +81,37 @@ entry:
 
 define void @fn_no_null_opt(i32* %P, i1 %C) null_pointer_is_valid {
 ;
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind null_pointer_is_valid
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn_no_null_opt
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] {
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    br label [[IF_END:%.*]]
-; IS__TUNIT_OPM:       for.cond1:
-; IS__TUNIT_OPM-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
-; IS__TUNIT_OPM:       if.end:
-; IS__TUNIT_OPM-NEXT:    [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__TUNIT_OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* null, align 4
-; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]]) #[[ATTR3]]
-; IS__TUNIT_OPM-NEXT:    store i32 [[CALL]], i32* [[P]], align 4
-; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND1]]
-; IS__TUNIT_OPM:       exit:
-; IS__TUNIT_OPM-NEXT:    ret void
-;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind null_pointer_is_valid
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn_no_null_opt
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] {
-; IS__TUNIT_NPM-NEXT:  entry:
-; IS__TUNIT_NPM-NEXT:    br label [[IF_END:%.*]]
-; IS__TUNIT_NPM:       for.cond1:
-; IS__TUNIT_NPM-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
-; IS__TUNIT_NPM:       if.end:
-; IS__TUNIT_NPM-NEXT:    [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* null, align 4
-; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[P]], align 4
-; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND1]]
-; IS__TUNIT_NPM:       exit:
-; IS__TUNIT_NPM-NEXT:    ret void
-;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn_no_null_opt
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree writeonly align 4 dereferenceable_or_null(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    br label [[IF_END:%.*]]
-; IS__CGSCC_OPM:       for.cond1:
-; IS__CGSCC_OPM-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
-; IS__CGSCC_OPM:       if.end:
-; IS__CGSCC_OPM-NEXT:    [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* null, align 4
-; IS__CGSCC_OPM-NEXT:    [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]])
-; IS__CGSCC_OPM-NEXT:    store i32 [[CALL]], i32* [[P]], align 4
-; IS__CGSCC_OPM-NEXT:    br label [[FOR_COND1]]
-; IS__CGSCC_OPM:       exit:
-; IS__CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn_no_null_opt
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree writeonly align 4 dereferenceable_or_null(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    br label [[IF_END:%.*]]
-; IS__CGSCC_NPM:       for.cond1:
-; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
-; IS__CGSCC_NPM:       if.end:
-; IS__CGSCC_NPM-NEXT:    [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
-; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* null, align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[P]], align 4
-; IS__CGSCC_NPM-NEXT:    br label [[FOR_COND1]]
-; IS__CGSCC_NPM:       exit:
-; IS__CGSCC_NPM-NEXT:    ret void
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind null_pointer_is_valid
+; IS__TUNIT____-LABEL: define {{[^@]+}}@fn_no_null_opt
+; IS__TUNIT____-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__TUNIT____-NEXT:  entry:
+; IS__TUNIT____-NEXT:    br label [[IF_END:%.*]]
+; IS__TUNIT____:       for.cond1:
+; IS__TUNIT____-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
+; IS__TUNIT____:       if.end:
+; IS__TUNIT____-NEXT:    [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
+; IS__TUNIT____-NEXT:    [[TMP0:%.*]] = load i32, i32* null, align 4
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]]) #[[ATTR3]]
+; IS__TUNIT____-NEXT:    store i32 [[CALL]], i32* [[P]], align 4
+; IS__TUNIT____-NEXT:    br label [[FOR_COND1]]
+; IS__TUNIT____:       exit:
+; IS__TUNIT____-NEXT:    ret void
+;
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid
+; IS__CGSCC____-LABEL: define {{[^@]+}}@fn_no_null_opt
+; IS__CGSCC____-SAME: (i32* nocapture nofree writeonly align 4 dereferenceable_or_null(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    br label [[IF_END:%.*]]
+; IS__CGSCC____:       for.cond1:
+; IS__CGSCC____-NEXT:    br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]]
+; IS__CGSCC____:       if.end:
+; IS__CGSCC____-NEXT:    [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ]
+; IS__CGSCC____-NEXT:    [[TMP0:%.*]] = load i32, i32* null, align 4
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]])
+; IS__CGSCC____-NEXT:    store i32 [[CALL]], i32* [[P]], align 4
+; IS__CGSCC____-NEXT:    br label [[FOR_COND1]]
+; IS__CGSCC____:       exit:
+; IS__CGSCC____-NEXT:    ret void
 ;
 entry:
   br label %if.end
@@ -192,23 +130,21 @@ exit:
 }
 
 define internal i32 @fn0(i32 %p1) {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn0
-; IS__TUNIT_OPM-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] {
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    ret i32 [[P1]]
-;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn0
-; IS__CGSCC_OPM-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    ret i32 [[P1]]
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn0
-; IS__CGSCC_NPM-SAME: (i32 [[P1:%.*]]) #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@fn0
+; IS__TUNIT____-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-NEXT:  entry:
+; IS__TUNIT____-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[P1]], 0
+; IS__TUNIT____-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[P1]], i32 [[P1]]
+; IS__TUNIT____-NEXT:    ret i32 [[COND]]
+;
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@fn0
+; IS__CGSCC____-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[P1]], 0
+; IS__CGSCC____-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[P1]], i32 [[P1]]
+; IS__CGSCC____-NEXT:    ret i32 [[COND]]
 ;
 entry:
   %tobool = icmp ne i32 %p1, 0
@@ -216,15 +152,12 @@ entry:
   ret i32 %cond
 }
 ;.
-; IS__TUNIT_OPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind }
-; IS__TUNIT_OPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR2]] = { nofree nosync nounwind null_pointer_is_valid }
-; IS__TUNIT_OPM: attributes #[[ATTR3]] = { nofree nosync nounwind readnone }
-;.
-; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind }
-; IS__TUNIT_NPM: attributes #[[ATTR1]] = { nofree nosync nounwind null_pointer_is_valid }
+; IS__TUNIT____: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind }
+; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR2]] = { nofree nosync nounwind null_pointer_is_valid }
+; IS__TUNIT____: attributes #[[ATTR3]] = { nofree nosync nounwind readnone }
 ;.
-; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree norecurse nosync nounwind }
-; IS__CGSCC____: attributes #[[ATTR1:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC____: attributes #[[ATTR2:[0-9]+]] = { nofree norecurse nosync nounwind null_pointer_is_valid }
+; IS__CGSCC____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind }
+; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind null_pointer_is_valid }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
index 159e064de07a..0965b9868856 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/arg-count-mismatch.ll
@@ -118,8 +118,8 @@ define dso_local i16 @vararg_tests(i16 %a) {
 define internal i16 @vararg_prop(i16 %p1, ...) {
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@vararg_prop
-; IS__CGSCC____-SAME: (i16 [[P1:%.*]], ...) #[[ATTR0]] {
-; IS__CGSCC____-NEXT:    ret i16 undef
+; IS__CGSCC____-SAME: (i16 returned [[P1:%.*]], ...) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    ret i16 7
 ;
   ret i16 %p1
 }
@@ -127,12 +127,12 @@ define internal i16 @vararg_prop(i16 %p1, ...) {
 define internal i16 @vararg_no_prop(i16 %p1, i16 %p2, ...) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@vararg_no_prop
-; IS__TUNIT____-SAME: (i16 [[P1:%.*]], i16 [[P2:%.*]], ...) #[[ATTR0]] {
+; IS__TUNIT____-SAME: (i16 returned [[P1:%.*]], i16 [[P2:%.*]], ...) #[[ATTR0]] {
 ; IS__TUNIT____-NEXT:    ret i16 7
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@vararg_no_prop
-; IS__CGSCC____-SAME: (i16 [[P1:%.*]], i16 [[P2:%.*]], ...) #[[ATTR0]] {
+; IS__CGSCC____-SAME: (i16 returned [[P1:%.*]], i16 [[P2:%.*]], ...) #[[ATTR0]] {
 ; IS__CGSCC____-NEXT:    ret i16 7
 ;
   ret i16 %p1

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll b/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll
index 35be623fb294..f335c012a78e 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/dangling-block-address.ll
@@ -54,7 +54,7 @@ define internal void @bar(i32* nocapture %pc) nounwind readonly {
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@bar
-; IS__CGSCC_NPM-SAME: () #[[ATTR1:[0-9]+]] {
+; IS__CGSCC_NPM-SAME: (i32* noalias nocapture nofree nonnull readonly align 536870912 dereferenceable(4294967295) [[PC:%.*]]) #[[ATTR1:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    br label [[INDIRECTGOTO:%.*]]
 ; IS__CGSCC_NPM:       lab0:

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll
index 86433b20ea45..354e86130e48 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/multiple_callbacks.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 ;
@@ -42,13 +42,13 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define internal i32 @cb0(i32 %zero) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@cb0
-; IS__TUNIT____-SAME: (i32 [[ZERO:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT____-SAME: (i32 returned [[ZERO:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    ret i32 0
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@cb0
-; IS__CGSCC____-SAME: (i32 [[ZERO:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32 returned [[ZERO:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    ret i32 0
 ;

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll b/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll
index 6131022a85d1..5ccec76faf24 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/musttail-call.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 ; PR36485
@@ -8,71 +8,40 @@
 
 declare i32 @external()
 
+; FIXME: We should not return undef here.
 define i8* @start(i8 %v) {
 ;
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@start
-; IS__TUNIT_OPM-SAME: (i8 [[V:%.*]]) {
-; IS__TUNIT_OPM-NEXT:    [[C1:%.*]] = icmp eq i8 [[V]], 0
-; IS__TUNIT_OPM-NEXT:    br i1 [[C1]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; IS__TUNIT_OPM:       true:
-; IS__TUNIT_OPM-NEXT:    [[CA:%.*]] = musttail call i8* @side_effects(i8 [[V]])
-; IS__TUNIT_OPM-NEXT:    ret i8* [[CA]]
-; IS__TUNIT_OPM:       false:
-; IS__TUNIT_OPM-NEXT:    [[C2:%.*]] = icmp eq i8 [[V]], 1
-; IS__TUNIT_OPM-NEXT:    br i1 [[C2]], label [[C2_TRUE:%.*]], label [[C2_FALSE:%.*]]
-; IS__TUNIT_OPM:       c2_true:
-; IS__TUNIT_OPM-NEXT:    ret i8* null
-; IS__TUNIT_OPM:       c2_false:
-; IS__TUNIT_OPM-NEXT:    [[CA2:%.*]] = musttail call i8* @dont_zap_me(i8 undef)
-; IS__TUNIT_OPM-NEXT:    ret i8* [[CA2]]
+; IS__TUNIT____-LABEL: define {{[^@]+}}@start
+; IS__TUNIT____-SAME: (i8 [[V:%.*]]) {
+; IS__TUNIT____-NEXT:    [[C1:%.*]] = icmp eq i8 [[V]], 0
+; IS__TUNIT____-NEXT:    br i1 [[C1]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; IS__TUNIT____:       true:
+; IS__TUNIT____-NEXT:    [[CA:%.*]] = musttail call i8* @side_effects(i8 [[V]])
+; IS__TUNIT____-NEXT:    ret i8* [[CA]]
+; IS__TUNIT____:       false:
+; IS__TUNIT____-NEXT:    [[C2:%.*]] = icmp eq i8 [[V]], 1
+; IS__TUNIT____-NEXT:    br i1 [[C2]], label [[C2_TRUE:%.*]], label [[C2_FALSE:%.*]]
+; IS__TUNIT____:       c2_true:
+; IS__TUNIT____-NEXT:    ret i8* null
+; IS__TUNIT____:       c2_false:
+; IS__TUNIT____-NEXT:    [[CA2:%.*]] = musttail call i8* @dont_zap_me(i8 undef)
+; IS__TUNIT____-NEXT:    ret i8* [[CA2]]
 ;
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@start
-; IS__TUNIT_NPM-SAME: (i8 [[V:%.*]]) {
-; IS__TUNIT_NPM-NEXT:    [[C1:%.*]] = icmp eq i8 [[V]], 0
-; IS__TUNIT_NPM-NEXT:    br i1 [[C1]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; IS__TUNIT_NPM:       true:
-; IS__TUNIT_NPM-NEXT:    [[CA:%.*]] = musttail call i8* @side_effects(i8 undef)
-; IS__TUNIT_NPM-NEXT:    ret i8* [[CA]]
-; IS__TUNIT_NPM:       false:
-; IS__TUNIT_NPM-NEXT:    [[C2:%.*]] = icmp eq i8 [[V]], 1
-; IS__TUNIT_NPM-NEXT:    br i1 [[C2]], label [[C2_TRUE:%.*]], label [[C2_FALSE:%.*]]
-; IS__TUNIT_NPM:       c2_true:
-; IS__TUNIT_NPM-NEXT:    ret i8* null
-; IS__TUNIT_NPM:       c2_false:
-; IS__TUNIT_NPM-NEXT:    [[CA2:%.*]] = musttail call i8* @dont_zap_me(i8 undef)
-; IS__TUNIT_NPM-NEXT:    ret i8* [[CA2]]
-;
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@start
-; IS__CGSCC_OPM-SAME: (i8 [[V:%.*]]) {
-; IS__CGSCC_OPM-NEXT:    [[C1:%.*]] = icmp eq i8 [[V]], 0
-; IS__CGSCC_OPM-NEXT:    br i1 [[C1]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; IS__CGSCC_OPM:       true:
-; IS__CGSCC_OPM-NEXT:    [[CA:%.*]] = musttail call i8* @side_effects(i8 [[V]])
-; IS__CGSCC_OPM-NEXT:    ret i8* [[CA]]
-; IS__CGSCC_OPM:       false:
-; IS__CGSCC_OPM-NEXT:    [[C2:%.*]] = icmp eq i8 [[V]], 1
-; IS__CGSCC_OPM-NEXT:    br i1 [[C2]], label [[C2_TRUE:%.*]], label [[C2_FALSE:%.*]]
-; IS__CGSCC_OPM:       c2_true:
-; IS__CGSCC_OPM-NEXT:    ret i8* undef
-; IS__CGSCC_OPM:       c2_false:
-; IS__CGSCC_OPM-NEXT:    [[CA2:%.*]] = musttail call i8* @dont_zap_me(i8 undef)
-; IS__CGSCC_OPM-NEXT:    ret i8* [[CA2]]
-;
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@start
-; IS__CGSCC_NPM-SAME: (i8 [[V:%.*]]) {
-; IS__CGSCC_NPM-NEXT:    [[C1:%.*]] = icmp eq i8 [[V]], 0
-; IS__CGSCC_NPM-NEXT:    br i1 [[C1]], label [[TRUE:%.*]], label [[FALSE:%.*]]
-; IS__CGSCC_NPM:       true:
-; IS__CGSCC_NPM-NEXT:    [[CA:%.*]] = musttail call i8* @side_effects(i8 undef)
-; IS__CGSCC_NPM-NEXT:    ret i8* [[CA]]
-; IS__CGSCC_NPM:       false:
-; IS__CGSCC_NPM-NEXT:    [[C2:%.*]] = icmp eq i8 [[V]], 1
-; IS__CGSCC_NPM-NEXT:    br i1 [[C2]], label [[C2_TRUE:%.*]], label [[C2_FALSE:%.*]]
-; IS__CGSCC_NPM:       c2_true:
-; IS__CGSCC_NPM-NEXT:    ret i8* undef
-; IS__CGSCC_NPM:       c2_false:
-; IS__CGSCC_NPM-NEXT:    [[CA2:%.*]] = musttail call i8* @dont_zap_me(i8 undef)
-; IS__CGSCC_NPM-NEXT:    ret i8* [[CA2]]
+; IS__CGSCC____-LABEL: define {{[^@]+}}@start
+; IS__CGSCC____-SAME: (i8 [[V:%.*]]) {
+; IS__CGSCC____-NEXT:    [[C1:%.*]] = icmp eq i8 [[V]], 0
+; IS__CGSCC____-NEXT:    br i1 [[C1]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; IS__CGSCC____:       true:
+; IS__CGSCC____-NEXT:    [[CA:%.*]] = musttail call i8* @side_effects(i8 [[V]])
+; IS__CGSCC____-NEXT:    ret i8* [[CA]]
+; IS__CGSCC____:       false:
+; IS__CGSCC____-NEXT:    [[C2:%.*]] = icmp eq i8 [[V]], 1
+; IS__CGSCC____-NEXT:    br i1 [[C2]], label [[C2_TRUE:%.*]], label [[C2_FALSE:%.*]]
+; IS__CGSCC____:       c2_true:
+; IS__CGSCC____-NEXT:    ret i8* undef
+; IS__CGSCC____:       c2_false:
+; IS__CGSCC____-NEXT:    [[CA2:%.*]] = musttail call i8* @dont_zap_me(i8 undef)
+; IS__CGSCC____-NEXT:    ret i8* [[CA2]]
 ;
   %c1 = icmp eq i8 %v, 0
   br i1 %c1, label %true, label %false
@@ -92,17 +61,11 @@ c2_false:
 }
 
 define internal i8* @side_effects(i8 %v) {
-; IS________OPM-LABEL: define {{[^@]+}}@side_effects
-; IS________OPM-SAME: (i8 [[V:%.*]]) {
-; IS________OPM-NEXT:    [[I1:%.*]] = call i32 @external()
-; IS________OPM-NEXT:    [[CA:%.*]] = musttail call i8* @start(i8 [[V]])
-; IS________OPM-NEXT:    ret i8* [[CA]]
-;
-; IS________NPM-LABEL: define {{[^@]+}}@side_effects
-; IS________NPM-SAME: (i8 [[V:%.*]]) {
-; IS________NPM-NEXT:    [[I1:%.*]] = call i32 @external()
-; IS________NPM-NEXT:    [[CA:%.*]] = musttail call i8* @start(i8 noundef 0)
-; IS________NPM-NEXT:    ret i8* [[CA]]
+; CHECK-LABEL: define {{[^@]+}}@side_effects
+; CHECK-SAME: (i8 [[V:%.*]]) {
+; CHECK-NEXT:    [[I1:%.*]] = call i32 @external()
+; CHECK-NEXT:    [[CA:%.*]] = musttail call i8* @start(i8 [[V]])
+; CHECK-NEXT:    ret i8* [[CA]]
 ;
   %i1 = call i32 @external()
 

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
index a8a866f68400..dd862223a03e 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
@@ -40,7 +40,7 @@ define dso_local i32 @main() {
 ; IS__TUNIT____-NEXT:    [[ALLOC2:%.*]] = alloca i8, align 8
 ; IS__TUNIT____-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @foo, i8* noalias nocapture nofree readnone align 536870912 undef)
-; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @bar, i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef)
+; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @bar, i8* noalias nofree nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" undef)
 ; IS__TUNIT____-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @baz, i8* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]])
 ; IS__TUNIT____-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @buz, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]])
 ; IS__TUNIT____-NEXT:    ret i32 0
@@ -51,7 +51,7 @@ define dso_local i32 @main() {
 ; IS__CGSCC____-NEXT:    [[ALLOC2:%.*]] = alloca i8, align 8
 ; IS__CGSCC____-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @foo, i8* noalias nocapture nofree noundef readnone align 536870912 null)
-; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @bar, i8* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*))
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @bar, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(8) bitcast (i8** @GlobalVPtr to i8*))
 ; IS__CGSCC____-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @baz, i8* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(1) [[ALLOC1]])
 ; IS__CGSCC____-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(i64* noundef nonnull align 8 dereferenceable(8) [[THREAD]], %union.pthread_attr_t* noalias nocapture noundef align 536870912 null, i8* (i8*)* noundef nonnull @buz, i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" [[ALLOC2]])
 ; IS__CGSCC____-NEXT:    ret i32 0
@@ -72,13 +72,13 @@ declare !callback !0 dso_local i32 @pthread_create(i64*, %union.pthread_attr_t*,
 define internal i8* @foo(i8* %arg) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@foo
-; IS__TUNIT____-SAME: (i8* noalias nocapture nofree readnone align 536870912 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT____-SAME: (i8* noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    ret i8* null
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@foo
-; IS__CGSCC____-SAME: (i8* noalias nocapture nofree readnone align 536870912 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__CGSCC____-SAME: (i8* noalias nofree readnone returned align 536870912 "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    ret i8* null
 ;
@@ -89,13 +89,13 @@ entry:
 define internal i8* @bar(i8* %arg) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@bar
-; IS__TUNIT____-SAME: (i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(8) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR0]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    ret i8* bitcast (i8** @GlobalVPtr to i8*)
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@bar
-; IS__CGSCC____-SAME: (i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(8) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    ret i8* bitcast (i8** @GlobalVPtr to i8*)
 ;

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll
index d59605ef299f..30dbdd109753 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll
@@ -20,33 +20,19 @@ define internal i32* @incdec(i1 %C, i32* %V) {
 ; IS__TUNIT____-NEXT:    store i32 [[X2]], i32* [[V]], align 4
 ; IS__TUNIT____-NEXT:    ret i32* [[V]]
 ;
-; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@incdec
-; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:    [[X:%.*]] = load i32, i32* [[V]], align 4
-; IS__CGSCC_OPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
-; IS__CGSCC_OPM:       T:
-; IS__CGSCC_OPM-NEXT:    [[X1:%.*]] = add i32 [[X]], 1
-; IS__CGSCC_OPM-NEXT:    store i32 [[X1]], i32* [[V]], align 4
-; IS__CGSCC_OPM-NEXT:    ret i32* [[V]]
-; IS__CGSCC_OPM:       F:
-; IS__CGSCC_OPM-NEXT:    [[X2:%.*]] = sub i32 [[X]], 1
-; IS__CGSCC_OPM-NEXT:    store i32 [[X2]], i32* [[V]], align 4
-; IS__CGSCC_OPM-NEXT:    ret i32* [[V]]
-;
-; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@incdec
-; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = load i32, i32* [[V]], align 4
-; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
-; IS__CGSCC_NPM:       T:
-; IS__CGSCC_NPM-NEXT:    [[X1:%.*]] = add i32 [[X]], 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[X1]], i32* [[V]], align 4
-; IS__CGSCC_NPM-NEXT:    ret i32* undef
-; IS__CGSCC_NPM:       F:
-; IS__CGSCC_NPM-NEXT:    [[X2:%.*]] = sub i32 [[X]], 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[X2]], i32* [[V]], align 4
-; IS__CGSCC_NPM-NEXT:    ret i32* undef
+; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@incdec
+; IS__CGSCC____-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__CGSCC____-NEXT:    [[X:%.*]] = load i32, i32* [[V]], align 4
+; IS__CGSCC____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; IS__CGSCC____:       T:
+; IS__CGSCC____-NEXT:    [[X1:%.*]] = add i32 [[X]], 1
+; IS__CGSCC____-NEXT:    store i32 [[X1]], i32* [[V]], align 4
+; IS__CGSCC____-NEXT:    ret i32* [[V]]
+; IS__CGSCC____:       F:
+; IS__CGSCC____-NEXT:    [[X2:%.*]] = sub i32 [[X]], 1
+; IS__CGSCC____-NEXT:    store i32 [[X2]], i32* [[V]], align 4
+; IS__CGSCC____-NEXT:    ret i32* [[V]]
 ;
   %X = load i32, i32* %V
   br i1 %C, label %T, label %F
@@ -88,81 +74,43 @@ define internal { i32, i32 } @foo(i32 %A, i32 %B) {
 }
 
 define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller
-; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 {
-; IS__TUNIT_OPM-NEXT:    [[Q:%.*]] = alloca i32, align 4
-; IS__TUNIT_OPM-NEXT:    [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2]]
-; IS__TUNIT_OPM-NEXT:    [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR1]]
-; IS__TUNIT_OPM-NEXT:    [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0
-; IS__TUNIT_OPM-NEXT:    [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR1]]
-; IS__TUNIT_OPM-NEXT:    br label [[OK:%.*]]
-; IS__TUNIT_OPM:       OK:
-; IS__TUNIT_OPM-NEXT:    [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0
-; IS__TUNIT_OPM-NEXT:    [[Z:%.*]] = add i32 [[X1]], [[X2]]
-; IS__TUNIT_OPM-NEXT:    store i32 [[Z]], i32* [[W]], align 4
-; IS__TUNIT_OPM-NEXT:    br label [[RET:%.*]]
-; IS__TUNIT_OPM:       LPAD:
-; IS__TUNIT_OPM-NEXT:    unreachable
-; IS__TUNIT_OPM:       RET:
-; IS__TUNIT_OPM-NEXT:    ret void
-;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller
-; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 {
-; IS__TUNIT_NPM-NEXT:    [[Q:%.*]] = alloca i32, align 4
-; IS__TUNIT_NPM-NEXT:    [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2]]
-; IS__TUNIT_NPM-NEXT:    [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR1]]
-; IS__TUNIT_NPM-NEXT:    [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0
-; IS__TUNIT_NPM-NEXT:    [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR1]]
-; IS__TUNIT_NPM-NEXT:    br label [[OK:%.*]]
-; IS__TUNIT_NPM:       OK:
-; IS__TUNIT_NPM-NEXT:    [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0
-; IS__TUNIT_NPM-NEXT:    [[Z:%.*]] = add i32 [[X1]], [[X2]]
-; IS__TUNIT_NPM-NEXT:    store i32 [[Z]], i32* [[Q]], align 4
-; IS__TUNIT_NPM-NEXT:    br label [[RET:%.*]]
-; IS__TUNIT_NPM:       LPAD:
-; IS__TUNIT_NPM-NEXT:    unreachable
-; IS__TUNIT_NPM:       RET:
-; IS__TUNIT_NPM-NEXT:    ret void
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@caller
+; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] personality i32 (...)* @__gxx_personality_v0 {
+; IS__TUNIT____-NEXT:    [[Q:%.*]] = alloca i32, align 4
+; IS__TUNIT____-NEXT:    [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2]]
+; IS__TUNIT____-NEXT:    [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR1]]
+; IS__TUNIT____-NEXT:    [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0
+; IS__TUNIT____-NEXT:    [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR1]]
+; IS__TUNIT____-NEXT:    br label [[OK:%.*]]
+; IS__TUNIT____:       OK:
+; IS__TUNIT____-NEXT:    [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0
+; IS__TUNIT____-NEXT:    [[Z:%.*]] = add i32 [[X1]], [[X2]]
+; IS__TUNIT____-NEXT:    store i32 [[Z]], i32* [[W]], align 4
+; IS__TUNIT____-NEXT:    br label [[RET:%.*]]
+; IS__TUNIT____:       LPAD:
+; IS__TUNIT____-NEXT:    unreachable
+; IS__TUNIT____:       RET:
+; IS__TUNIT____-NEXT:    ret void
 ;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller
-; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 {
-; IS__CGSCC_OPM-NEXT:    [[Q:%.*]] = alloca i32, align 4
-; IS__CGSCC_OPM-NEXT:    [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]]
-; IS__CGSCC_OPM-NEXT:    [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]]
-; IS__CGSCC_OPM-NEXT:    [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0
-; IS__CGSCC_OPM-NEXT:    [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR4:[0-9]+]]
-; IS__CGSCC_OPM-NEXT:    br label [[OK:%.*]]
-; IS__CGSCC_OPM:       OK:
-; IS__CGSCC_OPM-NEXT:    [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0
-; IS__CGSCC_OPM-NEXT:    [[Z:%.*]] = add i32 [[X1]], [[X2]]
-; IS__CGSCC_OPM-NEXT:    store i32 [[Z]], i32* [[W]], align 4
-; IS__CGSCC_OPM-NEXT:    br label [[RET:%.*]]
-; IS__CGSCC_OPM:       LPAD:
-; IS__CGSCC_OPM-NEXT:    unreachable
-; IS__CGSCC_OPM:       RET:
-; IS__CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller
-; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 {
-; IS__CGSCC_NPM-NEXT:    [[Q:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    [[W:%.*]] = call i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]]
-; IS__CGSCC_NPM-NEXT:    [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]]
-; IS__CGSCC_NPM-NEXT:    [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0
-; IS__CGSCC_NPM-NEXT:    [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR4:[0-9]+]]
-; IS__CGSCC_NPM-NEXT:    br label [[OK:%.*]]
-; IS__CGSCC_NPM:       OK:
-; IS__CGSCC_NPM-NEXT:    [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0
-; IS__CGSCC_NPM-NEXT:    [[Z:%.*]] = add i32 [[X1]], [[X2]]
-; IS__CGSCC_NPM-NEXT:    store i32 [[Z]], i32* [[Q]], align 4
-; IS__CGSCC_NPM-NEXT:    br label [[RET:%.*]]
-; IS__CGSCC_NPM:       LPAD:
-; IS__CGSCC_NPM-NEXT:    unreachable
-; IS__CGSCC_NPM:       RET:
-; IS__CGSCC_NPM-NEXT:    ret void
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@caller
+; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 {
+; IS__CGSCC____-NEXT:    [[Q:%.*]] = alloca i32, align 4
+; IS__CGSCC____-NEXT:    [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0
+; IS__CGSCC____-NEXT:    [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR4:[0-9]+]]
+; IS__CGSCC____-NEXT:    br label [[OK:%.*]]
+; IS__CGSCC____:       OK:
+; IS__CGSCC____-NEXT:    [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0
+; IS__CGSCC____-NEXT:    [[Z:%.*]] = add i32 [[X1]], [[X2]]
+; IS__CGSCC____-NEXT:    store i32 [[Z]], i32* [[W]], align 4
+; IS__CGSCC____-NEXT:    br label [[RET:%.*]]
+; IS__CGSCC____:       LPAD:
+; IS__CGSCC____-NEXT:    unreachable
+; IS__CGSCC____:       RET:
+; IS__CGSCC____-NEXT:    ret void
 ;
   %Q = alloca i32
   ;; Call incdec to see if %W is properly replaced by %Q
@@ -194,11 +142,11 @@ declare i32 @__gxx_personality_v0(...)
 ;.
 ; IS__TUNIT____: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind willreturn }
 ; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT____: attributes #[[ATTR2:[0-9]+]] = { nofree nosync nounwind willreturn }
+; IS__TUNIT____: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn }
 ;.
-; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree norecurse nosync nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC____: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR3:[0-9]+]] = { readnone willreturn }
-; IS__CGSCC____: attributes #[[ATTR4:[0-9]+]] = { nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR2]] = { nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR3]] = { readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR4]] = { nounwind readnone willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll
index 9078819fcee4..ade07f487ad8 100644
--- a/llvm/test/Transforms/Attributor/align.ll
+++ b/llvm/test/Transforms/Attributor/align.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 
@@ -108,14 +108,22 @@ define i32* @test5_2() {
 ; TEST 6
 ; SCC
 define i32* @test6_1() #0 {
-; NOT_CGSCC_NPM: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable willreturn
-; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test6_1
-; NOT_CGSCC_NPM-SAME: () #[[ATTR1:[0-9]+]] {
-; NOT_CGSCC_NPM-NEXT:    unreachable
+; IS__TUNIT____: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test6_1
+; IS__TUNIT____-SAME: () #[[ATTR1:[0-9]+]] {
+; IS__TUNIT____-NEXT:    [[RET:%.*]] = tail call i32* @test6_2() #[[ATTR11:[0-9]+]]
+; IS__TUNIT____-NEXT:    unreachable
+;
+; IS__CGSCC_OPM: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test6_1
+; IS__CGSCC_OPM-SAME: () #[[ATTR1:[0-9]+]] {
+; IS__CGSCC_OPM-NEXT:    [[RET:%.*]] = tail call i32* @test6_2() #[[ATTR13:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    unreachable
 ;
-; IS__CGSCC_NPM: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable willreturn
+; IS__CGSCC_NPM: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test6_1
 ; IS__CGSCC_NPM-SAME: () #[[ATTR1:[0-9]+]] {
+; IS__CGSCC_NPM-NEXT:    [[RET:%.*]] = tail call i32* @test6_2() #[[ATTR12:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    unreachable
 ;
   %ret = tail call i32* @test6_2()
@@ -123,14 +131,22 @@ define i32* @test6_1() #0 {
 }
 
 define i32* @test6_2() #0 {
-; NOT_CGSCC_NPM: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable willreturn
-; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test6_2
-; NOT_CGSCC_NPM-SAME: () #[[ATTR1]] {
-; NOT_CGSCC_NPM-NEXT:    unreachable
+; IS__TUNIT____: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test6_2
+; IS__TUNIT____-SAME: () #[[ATTR1]] {
+; IS__TUNIT____-NEXT:    [[RET:%.*]] = tail call i32* @test6_1() #[[ATTR11]]
+; IS__TUNIT____-NEXT:    unreachable
+;
+; IS__CGSCC_OPM: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test6_2
+; IS__CGSCC_OPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_OPM-NEXT:    [[RET:%.*]] = tail call i32* @test6_1() #[[ATTR13]]
+; IS__CGSCC_OPM-NEXT:    unreachable
 ;
-; IS__CGSCC_NPM: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable willreturn
+; IS__CGSCC_NPM: Function Attrs: nofree noinline noreturn nosync nounwind readnone uwtable
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test6_2
 ; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[RET:%.*]] = tail call i32* @test6_1() #[[ATTR12]]
 ; IS__CGSCC_NPM-NEXT:    unreachable
 ;
   %ret = tail call i32* @test6_1()
@@ -157,14 +173,23 @@ define i32* @test6_2() #0 {
 
 ; Function Attrs: nounwind readnone ssp uwtable
 define internal i8* @f1(i8* readnone %0) local_unnamed_addr #0 {
+; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@f1
+; IS__TUNIT____-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    br label [[TMP3:%.*]]
+; IS__TUNIT____:       2:
+; IS__TUNIT____-NEXT:    unreachable
+; IS__TUNIT____:       3:
+; IS__TUNIT____-NEXT:    ret i8* [[TMP0]]
+;
 ; IS__CGSCC_OPM: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f1
-; IS__CGSCC_OPM-SAME: (i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; IS__CGSCC_OPM-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 ; IS__CGSCC_OPM-NEXT:    br label [[TMP3:%.*]]
 ; IS__CGSCC_OPM:       2:
 ; IS__CGSCC_OPM-NEXT:    unreachable
 ; IS__CGSCC_OPM:       3:
-; IS__CGSCC_OPM-NEXT:    ret i8* undef
+; IS__CGSCC_OPM-NEXT:    ret i8* @a1
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f1
@@ -195,22 +220,24 @@ define internal i8* @f2(i8* readnone %0) local_unnamed_addr #0 {
 ; IS__CGSCC_OPM-SAME: (i8* nonnull readnone [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 ; IS__CGSCC_OPM-NEXT:    br label [[TMP2:%.*]]
 ; IS__CGSCC_OPM:       2:
-; IS__CGSCC_OPM-NEXT:    br label [[TMP4:%.*]]
-; IS__CGSCC_OPM:       3:
-; IS__CGSCC_OPM-NEXT:    unreachable
+; IS__CGSCC_OPM-NEXT:    [[TMP3:%.*]] = tail call i8* @f1(i8* noalias nonnull readnone align 536870912 dereferenceable(4294967295) undef)
+; IS__CGSCC_OPM-NEXT:    br label [[TMP5:%.*]]
 ; IS__CGSCC_OPM:       4:
-; IS__CGSCC_OPM-NEXT:    ret i8* undef
+; IS__CGSCC_OPM-NEXT:    unreachable
+; IS__CGSCC_OPM:       5:
+; IS__CGSCC_OPM-NEXT:    ret i8* [[TMP3]]
 ;
 ; IS__CGSCC_NPM: Function Attrs: noinline nounwind uwtable
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f2
 ; IS__CGSCC_NPM-SAME: (i8* nonnull readnone [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:    br label [[TMP2:%.*]]
 ; IS__CGSCC_NPM:       2:
-; IS__CGSCC_NPM-NEXT:    br label [[TMP4:%.*]]
-; IS__CGSCC_NPM:       3:
-; IS__CGSCC_NPM-NEXT:    unreachable
+; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = tail call i8* @f1()
+; IS__CGSCC_NPM-NEXT:    br label [[TMP5:%.*]]
 ; IS__CGSCC_NPM:       4:
-; IS__CGSCC_NPM-NEXT:    ret i8* undef
+; IS__CGSCC_NPM-NEXT:    unreachable
+; IS__CGSCC_NPM:       5:
+; IS__CGSCC_NPM-NEXT:    ret i8* @a1
 ;
   %2 = icmp eq i8* %0, null
   br i1 %2, label %5, label %3
@@ -267,7 +294,8 @@ define align 4 i8* @test7() #0 {
 ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test7
 ; IS__TUNIT____-SAME: () #[[ATTR0]] {
-; IS__TUNIT____-NEXT:    ret i8* @a1
+; IS__TUNIT____-NEXT:    [[C:%.*]] = tail call i8* @f1(i8* noalias nofree noundef nonnull readnone align 8 dereferenceable(1) "no-capture-maybe-returned" @a1) #[[ATTR9:[0-9]+]]
+; IS__TUNIT____-NEXT:    ret i8* [[C]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test7
@@ -283,12 +311,12 @@ define align 4 i8* @test7() #0 {
 define internal i8* @f1b(i8* readnone %0) local_unnamed_addr #0 {
 ; IS__CGSCC_OPM: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f1b
-; IS__CGSCC_OPM-SAME: (i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(1) [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; IS__CGSCC_OPM-SAME: (i8* noalias nofree nonnull readnone returned align 8 dereferenceable(1) "no-capture-maybe-returned" [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] {
 ; IS__CGSCC_OPM-NEXT:    br label [[TMP3:%.*]]
 ; IS__CGSCC_OPM:       2:
 ; IS__CGSCC_OPM-NEXT:    unreachable
 ; IS__CGSCC_OPM:       3:
-; IS__CGSCC_OPM-NEXT:    ret i8* undef
+; IS__CGSCC_OPM-NEXT:    ret i8* @a1
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f1b
@@ -321,22 +349,24 @@ define internal i8* @f2b(i8* readnone %0) local_unnamed_addr #0 {
 ; IS__CGSCC_OPM-SAME: (i8* nonnull readnone [[TMP0:%.*]]) local_unnamed_addr #[[ATTR3]] {
 ; IS__CGSCC_OPM-NEXT:    br label [[TMP2:%.*]]
 ; IS__CGSCC_OPM:       2:
-; IS__CGSCC_OPM-NEXT:    br label [[TMP4:%.*]]
-; IS__CGSCC_OPM:       3:
-; IS__CGSCC_OPM-NEXT:    unreachable
+; IS__CGSCC_OPM-NEXT:    [[TMP3:%.*]] = tail call i8* @f1b(i8* noalias nonnull readnone align 536870912 dereferenceable(4294967295) undef)
+; IS__CGSCC_OPM-NEXT:    br label [[TMP5:%.*]]
 ; IS__CGSCC_OPM:       4:
-; IS__CGSCC_OPM-NEXT:    ret i8* undef
+; IS__CGSCC_OPM-NEXT:    unreachable
+; IS__CGSCC_OPM:       5:
+; IS__CGSCC_OPM-NEXT:    ret i8* [[TMP3]]
 ;
 ; IS__CGSCC_NPM: Function Attrs: noinline nounwind uwtable
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f2b
 ; IS__CGSCC_NPM-SAME: (i8* nonnull readnone [[TMP0:%.*]]) local_unnamed_addr #[[ATTR2]] {
 ; IS__CGSCC_NPM-NEXT:    br label [[TMP2:%.*]]
 ; IS__CGSCC_NPM:       2:
-; IS__CGSCC_NPM-NEXT:    br label [[TMP4:%.*]]
-; IS__CGSCC_NPM:       3:
-; IS__CGSCC_NPM-NEXT:    unreachable
+; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = tail call i8* @f1b()
+; IS__CGSCC_NPM-NEXT:    br label [[TMP5:%.*]]
 ; IS__CGSCC_NPM:       4:
-; IS__CGSCC_NPM-NEXT:    ret i8* undef
+; IS__CGSCC_NPM-NEXT:    unreachable
+; IS__CGSCC_NPM:       5:
+; IS__CGSCC_NPM-NEXT:    ret i8* @a1
 ;
   %2 = icmp eq i8* %0, null
   br i1 %2, label %5, label %3
@@ -1090,7 +1120,7 @@ end:
 define i64 @ptr2int(i32* %p) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@ptr2int
-; IS__TUNIT____-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR9:[0-9]+]] {
+; IS__TUNIT____-SAME: (i32* nofree readnone [[P:%.*]]) #[[ATTR9]] {
 ; IS__TUNIT____-NEXT:    [[P2I:%.*]] = ptrtoint i32* [[P]] to i64
 ; IS__TUNIT____-NEXT:    ret i64 [[P2I]]
 ;
@@ -1239,7 +1269,7 @@ define i32 @musttail_caller_1(i32* %p) {
 ; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = load i1, i1* @cnd, align 1
 ; IS__CGSCC_OPM-NEXT:    br i1 [[C]], label [[MT:%.*]], label [[EXIT:%.*]]
 ; IS__CGSCC_OPM:       mt:
-; IS__CGSCC_OPM-NEXT:    [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR13:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR14:[0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 [[V]]
 ; IS__CGSCC_OPM:       exit:
 ; IS__CGSCC_OPM-NEXT:    ret i32 0
@@ -1250,7 +1280,7 @@ define i32 @musttail_caller_1(i32* %p) {
 ; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = load i1, i1* @cnd, align 1
 ; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[MT:%.*]], label [[EXIT:%.*]]
 ; IS__CGSCC_NPM:       mt:
-; IS__CGSCC_NPM-NEXT:    [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR12:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[V:%.*]] = musttail call i32 @musttail_callee_1(i32* nocapture nofree noundef nonnull readonly dereferenceable(4) [[P]]) #[[ATTR13:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[V]]
 ; IS__CGSCC_NPM:       exit:
 ; IS__CGSCC_NPM-NEXT:    ret i32 0
@@ -1348,7 +1378,7 @@ attributes #1 = { uwtable noinline }
 attributes #2 = { null_pointer_is_valid }
 ;.
 ; IS__TUNIT____: attributes #[[ATTR0]] = { nofree noinline nosync nounwind readnone uwtable willreturn }
-; IS__TUNIT____: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone uwtable willreturn }
+; IS__TUNIT____: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone uwtable }
 ; IS__TUNIT____: attributes #[[ATTR2]] = { nounwind }
 ; IS__TUNIT____: attributes #[[ATTR3]] = { nofree nosync nounwind }
 ; IS__TUNIT____: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind readonly willreturn }
@@ -1358,9 +1388,10 @@ attributes #2 = { null_pointer_is_valid }
 ; IS__TUNIT____: attributes #[[ATTR8]] = { nofree nosync nounwind willreturn writeonly }
 ; IS__TUNIT____: attributes #[[ATTR9]] = { nofree nosync nounwind readnone willreturn }
 ; IS__TUNIT____: attributes #[[ATTR10]] = { nofree nosync nounwind readonly willreturn }
+; IS__TUNIT____: attributes #[[ATTR11]] = { nofree noreturn nosync nounwind readnone }
 ;.
 ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone uwtable willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone uwtable willreturn }
+; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone uwtable }
 ; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree noinline nosync nounwind readnone uwtable willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR3]] = { noinline nounwind uwtable }
 ; IS__CGSCC_OPM: attributes #[[ATTR4]] = { nounwind }
@@ -1372,10 +1403,11 @@ attributes #2 = { null_pointer_is_valid }
 ; IS__CGSCC_OPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__CGSCC_OPM: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind readonly willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR13]] = { readonly willreturn }
+; IS__CGSCC_OPM: attributes #[[ATTR13]] = { nofree noreturn nosync nounwind readnone }
+; IS__CGSCC_OPM: attributes #[[ATTR14]] = { readonly willreturn }
 ;.
 ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone uwtable willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree noinline noreturn nosync nounwind readnone uwtable }
 ; IS__CGSCC_NPM: attributes #[[ATTR2]] = { noinline nounwind uwtable }
 ; IS__CGSCC_NPM: attributes #[[ATTR3]] = { nounwind }
 ; IS__CGSCC_NPM: attributes #[[ATTR4]] = { nofree nosync nounwind }
@@ -1386,5 +1418,6 @@ attributes #2 = { null_pointer_is_valid }
 ; IS__CGSCC_NPM: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__CGSCC_NPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind readonly willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR12]] = { readonly willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR12]] = { nofree noreturn nosync nounwind readnone }
+; IS__CGSCC_NPM: attributes #[[ATTR13]] = { readonly willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll b/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll
index 748f2bb2ed38..b954fa84e858 100644
--- a/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll
+++ b/llvm/test/Transforms/Attributor/cb_liveness_disabled.ll
@@ -68,10 +68,10 @@ define i32 @test(i32 %0, i32 %1) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP6:%.*]]
 ; CHECK:       4:
-; CHECK-NEXT:    [[TMP5:%.*]] = call noundef i32 @test_range1(i32 [[TMP0]]) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @test_range1(i32 [[TMP0]]) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    br label [[TMP8:%.*]]
 ; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = call noundef i32 @test_range2(i32 [[TMP0]]) #[[ATTR1]], !range [[RNG1:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @test_range2(i32 [[TMP0]]) #[[ATTR1]], !range [[RNG1:![0-9]+]]
 ; CHECK-NEXT:    br label [[TMP8]]
 ; CHECK:       8:
 ; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ [[TMP5]], [[TMP4]] ], [ [[TMP7]], [[TMP6]] ]

diff  --git a/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll b/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll
index c5ed7b0278eb..ef214cb11cea 100644
--- a/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll
+++ b/llvm/test/Transforms/Attributor/cb_liveness_enabled.ll
@@ -68,10 +68,10 @@ define i32 @test(i32 %0, i32 %1) #0 {
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP6:%.*]]
 ; CHECK:       4:
-; CHECK-NEXT:    [[TMP5:%.*]] = call noundef i32 @test_range1(i32 [[TMP0]]) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @test_range1(i32 [[TMP0]]) #[[ATTR1:[0-9]+]], !range [[RNG0:![0-9]+]]
 ; CHECK-NEXT:    br label [[TMP8:%.*]]
 ; CHECK:       6:
-; CHECK-NEXT:    [[TMP7:%.*]] = call noundef i32 @test_range2(i32 [[TMP0]]) #[[ATTR1]], !range [[RNG1:![0-9]+]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @test_range2(i32 [[TMP0]]) #[[ATTR1]], !range [[RNG1:![0-9]+]]
 ; CHECK-NEXT:    br label [[TMP8]]
 ; CHECK:       8:
 ; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ [[TMP5]], [[TMP4]] ], [ [[TMP7]], [[TMP6]] ]
@@ -94,12 +94,16 @@ define i32 @test(i32 %0, i32 %1) #0 {
 }
 
 define i32 @test_pcheck1(i32 %0) #0 {
-; CHECK-LABEL: define {{[^@]+}}@test_pcheck1
-; CHECK-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 1) #[[ATTR1]], !range [[RNG2:![0-9]+]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 101
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
-; CHECK-NEXT:    ret i32 [[TMP4]]
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test_pcheck1
+; IS__TUNIT____-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    ret i32 1
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test_pcheck1
+; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 1) #[[ATTR1]], !range [[RNG2:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[TMP2]], 101
+; IS__CGSCC____-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; IS__CGSCC____-NEXT:    ret i32 [[TMP4]]
 ;
 ; IS__TUNIT_____ENABLED-LABEL: define {{[^@]+}}@test_pcheck1
 ; IS__TUNIT_____ENABLED-SAME: (i32 [[TMP0:%.*]])
@@ -111,12 +115,16 @@ define i32 @test_pcheck1(i32 %0) #0 {
 }
 
 define i32 @test_pcheck2(i32 %0) #0 {
-; CHECK-LABEL: define {{[^@]+}}@test_pcheck2
-; CHECK-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 0) #[[ATTR1]], !range [[RNG2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 99
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
-; CHECK-NEXT:    ret i32 [[TMP4]]
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test_pcheck2
+; IS__TUNIT____-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    ret i32 1
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test_pcheck2
+; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 0) #[[ATTR1]], !range [[RNG2]]
+; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 99
+; IS__CGSCC____-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; IS__CGSCC____-NEXT:    ret i32 [[TMP4]]
 ;
   %2 = call i32 @test(i32 %0, i32 0)
   %3 = icmp sgt i32 %2, 99
@@ -125,12 +133,19 @@ define i32 @test_pcheck2(i32 %0) #0 {
 }
 
 define i32 @test_ncheck1(i32 %0) #0 {
-; CHECK-LABEL: define {{[^@]+}}@test_ncheck1
-; CHECK-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 1) #[[ATTR1]], !range [[RNG2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 50
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
-; CHECK-NEXT:    ret i32 [[TMP4]]
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test_ncheck1
+; IS__TUNIT____-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 1) #[[ATTR1]], !range [[RNG0]]
+; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 50
+; IS__TUNIT____-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; IS__TUNIT____-NEXT:    ret i32 [[TMP4]]
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test_ncheck1
+; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 1) #[[ATTR1]], !range [[RNG2]]
+; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 50
+; IS__CGSCC____-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; IS__CGSCC____-NEXT:    ret i32 [[TMP4]]
 ;
   %2 = call i32 @test(i32 %0, i32 1)
   %3 = icmp sgt i32 %2, 50
@@ -139,12 +154,19 @@ define i32 @test_ncheck1(i32 %0) #0 {
 }
 
 define i32 @test_ncheck2(i32 %0) #0 {
-; CHECK-LABEL: define {{[^@]+}}@test_ncheck2
-; CHECK-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 0) #[[ATTR1]], !range [[RNG2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 150
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
-; CHECK-NEXT:    ret i32 [[TMP4]]
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test_ncheck2
+; IS__TUNIT____-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 0) #[[ATTR1]], !range [[RNG1]]
+; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 150
+; IS__TUNIT____-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; IS__TUNIT____-NEXT:    ret i32 [[TMP4]]
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test_ncheck2
+; IS__CGSCC____-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = call i32 @test(i32 [[TMP0]], i32 noundef 0) #[[ATTR1]], !range [[RNG2]]
+; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 150
+; IS__CGSCC____-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; IS__CGSCC____-NEXT:    ret i32 [[TMP4]]
 ;
   %2 = call i32 @test(i32 %0, i32 0)
   %3 = icmp sgt i32 %2, 150
@@ -163,7 +185,10 @@ attributes #0 = { noinline nounwind sspstrong uwtable}
 ; IS__CGSCC____: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone sspstrong uwtable willreturn }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { readnone willreturn }
 ;.
-; CHECK: [[RNG0]] = !{i32 0, i32 101}
-; CHECK: [[RNG1]] = !{i32 100, i32 201}
-; CHECK: [[RNG2]] = !{i32 0, i32 201}
+; IS__TUNIT____: [[RNG0]] = !{i32 0, i32 101}
+; IS__TUNIT____: [[RNG1]] = !{i32 100, i32 201}
+;.
+; IS__CGSCC____: [[RNG0]] = !{i32 0, i32 101}
+; IS__CGSCC____: [[RNG1]] = !{i32 100, i32 201}
+; IS__CGSCC____: [[RNG2]] = !{i32 0, i32 201}
 ;.

diff  --git a/llvm/test/Transforms/Attributor/cb_range_enabled.ll b/llvm/test/Transforms/Attributor/cb_range_enabled.ll
index ce28144bd183..49515d63d0ac 100644
--- a/llvm/test/Transforms/Attributor/cb_range_enabled.ll
+++ b/llvm/test/Transforms/Attributor/cb_range_enabled.ll
@@ -64,9 +64,19 @@ define i32 @test2(i32 %unknown, i32 %b) {
 ;        we need to look into this again. For the purpose of making some progress we take this regression
 ;        for now, call site contexts are not on by default anyway (yet).
 define i32 @test1_pcheck(i32 %unknown) {
-; CHECK-LABEL: define {{[^@]+}}@test1_pcheck
-; CHECK-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    ret i32 1
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test1_pcheck
+; IS__TUNIT____-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i32 @test1(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR0]], !range [[RNG1:![0-9]+]]
+; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 90
+; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; IS__TUNIT____-NEXT:    ret i32 [[TMP3]]
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test1_pcheck
+; IS__CGSCC____-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i32 @test1(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR1]], !range [[RNG1:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 90
+; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; IS__CGSCC____-NEXT:    ret i32 [[TMP3]]
 ;
   %1 = call i32 @test1(i32 %unknown, i32 20)
   %2 = icmp sle i32 %1, 90
@@ -75,9 +85,19 @@ define i32 @test1_pcheck(i32 %unknown) {
 }
 
 define i32 @test2_pcheck(i32 %unknown) {
-; CHECK-LABEL: define {{[^@]+}}@test2_pcheck
-; CHECK-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    ret i32 1
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test2_pcheck
+; IS__TUNIT____-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i32 @test2(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR0]], !range [[RNG2:![0-9]+]]
+; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[TMP1]], 20
+; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; IS__TUNIT____-NEXT:    ret i32 [[TMP3]]
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test2_pcheck
+; IS__CGSCC____-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i32 @test2(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR1]], !range [[RNG2:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[TMP1]], 20
+; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
+; IS__CGSCC____-NEXT:    ret i32 [[TMP3]]
 ;
   %1 = call i32 @test2(i32 %unknown, i32 20)
   %2 = icmp sge i32 %1, 20
@@ -90,14 +110,14 @@ define i32 @test2_pcheck(i32 %unknown) {
 define i32 @test1_ncheck(i32 %unknown) {
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test1_ncheck
 ; IS__TUNIT____-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
-; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i32 @test1(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR0]], !range [[RNG1:![0-9]+]]
+; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i32 @test1(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR0]], !range [[RNG1]]
 ; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 10
 ; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
 ; IS__TUNIT____-NEXT:    ret i32 [[TMP3]]
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test1_ncheck
 ; IS__CGSCC____-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
-; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i32 @test1(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR1]], !range [[RNG1:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i32 @test1(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR1]], !range [[RNG1]]
 ; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = icmp sle i32 [[TMP1]], 10
 ; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
 ; IS__CGSCC____-NEXT:    ret i32 [[TMP3]]
@@ -111,14 +131,14 @@ define i32 @test1_ncheck(i32 %unknown) {
 define i32 @test2_ncheck(i32 %unknown) {
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test2_ncheck
 ; IS__TUNIT____-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
-; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i32 @test2(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR0]], !range [[RNG2:![0-9]+]]
+; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i32 @test2(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR0]], !range [[RNG2]]
 ; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[TMP1]], 30
 ; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
 ; IS__TUNIT____-NEXT:    ret i32 [[TMP3]]
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test2_ncheck
 ; IS__CGSCC____-SAME: (i32 [[UNKNOWN:%.*]]) #[[ATTR0]] {
-; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i32 @test2(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR1]], !range [[RNG2:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i32 @test2(i32 [[UNKNOWN]], i32 noundef 20) #[[ATTR1]], !range [[RNG2]]
 ; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = icmp sge i32 [[TMP1]], 30
 ; IS__CGSCC____-NEXT:    [[TMP3:%.*]] = zext i1 [[TMP2]] to i32
 ; IS__CGSCC____-NEXT:    ret i32 [[TMP3]]

diff  --git a/llvm/test/Transforms/Attributor/cgscc_bugs.ll b/llvm/test/Transforms/Attributor/cgscc_bugs.ll
index 98cb3596f4b3..1f88a12b1bf6 100644
--- a/llvm/test/Transforms/Attributor/cgscc_bugs.ll
+++ b/llvm/test/Transforms/Attributor/cgscc_bugs.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=12 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=12 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll
index fa9fc6c06caa..f4186308f891 100644
--- a/llvm/test/Transforms/Attributor/depgraph.ll
+++ b/llvm/test/Transforms/Attributor/depgraph.ll
@@ -53,32 +53,30 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 
 ; GRAPH:      [AAIsDead] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state Live[#BB 4/4][#TBEP 0][#KDE 1]
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state not-simple
+; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state simplified
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAWillReturn] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state may-noreturn
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAUndefinedBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state undefined-behavior
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state not-simple
+; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state simplified
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAIsDead] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state assumed-live
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANoUndef] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state may-undef-or-poison
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAReturnedValues] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state returns(#3)
+; GRAPH-NEXT: [AAReturnedValues] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state returns(#3)[#UC: 1]
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state nounwind
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-NEXT:   updates [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind
 ; GRAPH-NEXT:   updates [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind
-; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
-; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
-; GRAPH-NEXT:   updates [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind
 ; GRAPH-NEXT:   updates [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
+; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state nosync
 ; GRAPH-NEXT:   updates [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nosync
@@ -86,7 +84,7 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAIsDead] for CtxI '  %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state assumed-live
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state not-simple
+; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state simplified
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAValueConstantRange] for CtxI '  %2 = load i32, i32* %0, align 4' at position {flt: [@-1]} with state range(32)<full-set / full-set>
 ; GRAPH-EMPTY:
@@ -94,6 +92,8 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAIsDead] for CtxI '  %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state assumed-live
 ; GRAPH-EMPTY:
+; GRAPH-NEXT: [AAIsDead] for CtxI '  br i1 %3, label %4, label %7' at position {flt: [@-1]} with state assumed-live
+; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state nofree
 ; GRAPH-NEXT:   updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree
 ; GRAPH-NEXT:   updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state nofree
@@ -105,27 +105,28 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-NEXT: [AANoRecurse] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state may-recurse
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state readonly
-; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly
-; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly
-; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
-; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly
 ; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
+; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly
 ; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly
 ; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly
 ; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly
+; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly
+; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
+; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state readonly
+; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state memory:argument
 ; GRAPH-NEXT:   updates [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
 ; GRAPH-NEXT:   updates [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAHeapToStack] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state [H2S] Mallocs Good/Bad: 0/0
+; GRAPH-NEXT: [AAHeapToStack] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state [H2S] Mallocs Good/Bad: 0/1
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state not-simple
+; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state simplified
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state align<1-16>
 ; GRAPH-NEXT:   updates [AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16>
@@ -174,15 +175,11 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nounwind
 ; GRAPH-NEXT:   updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live
-; GRAPH-NEXT:   updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live
 ; GRAPH-NEXT:   updates [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state nounwind
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state readonly
-; GRAPH-NEXT:   updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state assumed-live
 ; GRAPH-NEXT:   updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state readonly
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state not-simple
-; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed-live
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state simplified
@@ -192,7 +189,6 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-NEXT: [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
-; GRAPH-NEXT:   updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]} with state assumed not-captured-maybe-returned
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANoAlias] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state may-alias
 ; GRAPH-EMPTY:
@@ -205,20 +201,22 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAValueConstantRange] for CtxI '  %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state range(1)<full-set / full-set>
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAValueSimplify] for CtxI <<null inst>> at position {flt: [@-1]} with state not-simple
+; GRAPH-NEXT: [AAValueSimplify] for CtxI <<null inst>> at position {flt: [@-1]} with state simplified
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAValueConstantRange] for CtxI <<null inst>> at position {flt: [@-1]} with state range(32)<[0,1) / [0,1)>
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAPotentialValues] for CtxI '  %3 = icmp eq i32 %2, 0' at position {flt: [@-1]} with state set-state(< {full-set} >)
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAIsDead] for CtxI '  br i1 %3, label %4, label %7' at position {flt: [@-1]} with state assumed-live
-; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANoReturn] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-return
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state not-simple
+; GRAPH-NEXT: [AANoAlias] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-alias
+; GRAPH-EMPTY:
+; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state simplified
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANoUndef] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-undef-or-poison
 ; GRAPH-EMPTY:
+; GRAPH-NEXT: [AAValueSimplify] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state simplified
+; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAAlign] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state align<16-16>
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state nonnull
@@ -226,20 +224,14 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-NEXT:   updates [AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state nonnull
 ; GRAPH-NEXT:   updates [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state nonnull
 ; GRAPH-NEXT:   updates [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state nonnull
-; GRAPH-NEXT:   updates [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state nonnull
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAIsDead] for CtxI '  ret i32* %.0' at position {flt: [@-1]} with state assumed-live
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAIsDead] for CtxI '  br label %8' at position {flt: [@-1]} with state assumed-live
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAIsDead] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state assumed-live
-; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAIsDead] for CtxI '  br label %8' at position {flt: [@-1]} with state assumed-live
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AANoAlias] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state may-alias
-; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
-; GRAPH-NEXT:   updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state memory:argument
+; GRAPH-NEXT: [AAIsDead] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state assumed-live
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AAWillReturn] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state may-noreturn
 ; GRAPH-EMPTY:
@@ -251,13 +243,18 @@ define i32* @checkAndAdvance(i32* align 16 %0) {
 ; GRAPH-NEXT: [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state nofree
 ; GRAPH-NEXT:   updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state nofree
 ; GRAPH-EMPTY:
-; GRAPH-NEXT:  [AADereferenceable] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state unknown-dereferenceable
+; GRAPH-NEXT: [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
+; GRAPH-NEXT:   updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance at -1]} with state memory:argument
+; GRAPH-EMPTY:
+; GRAPH-NEXT: [AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16>
+; GRAPH-NEXT:   updates [AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state align<1-16>
+; GRAPH-EMPTY:
+; GRAPH-NEXT: [AADereferenceable] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state unknown-dereferenceable
 ; GRAPH-EMPTY:
 ; GRAPH-NEXT: [AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state nonnull
 ; GRAPH-NEXT:   updates [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state nonnull
 ; GRAPH-EMPTY:
-; GRAPH-NEXT: [AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]} with state align<1-16>
-; GRAPH-NEXT:   updates [AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance at -1]} with state align<1-16>
+; GRAPH-NEXT: [AADereferenceable] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]} with state unknown-dereferenceable
 
 ; GRAPH-NOT: update
 

diff  --git a/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll b/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll
index c649b17f59ea..0e222f6ad1ee 100644
--- a/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll
+++ b/llvm/test/Transforms/Attributor/dereferenceable-2-inseltpoison.ll
@@ -299,13 +299,15 @@ define void @volatile_is_not_dereferenceable(i16* %ptr) {
 ; IS__TUNIT____: Function Attrs: argmemonly nofree nounwind willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable
 ; IS__TUNIT____-SAME: (i16* nofree align 2 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
-; IS__TUNIT____-NEXT:    [[T0:%.*]] = load volatile i16, i16* [[PTR]], align 2
+; IS__TUNIT____-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr i16, i16* [[PTR]], i64 0
+; IS__TUNIT____-NEXT:    [[T0:%.*]] = load volatile i16, i16* [[ARRAYIDX0]], align 2
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable
 ; IS__CGSCC____-SAME: (i16* nofree align 2 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
-; IS__CGSCC____-NEXT:    [[T0:%.*]] = load volatile i16, i16* [[PTR]], align 2
+; IS__CGSCC____-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr i16, i16* [[PTR]], i64 0
+; IS__CGSCC____-NEXT:    [[T0:%.*]] = load volatile i16, i16* [[ARRAYIDX0]], align 2
 ; IS__CGSCC____-NEXT:    ret void
 ;
   %arrayidx0 = getelementptr i16, i16* %ptr, i64 0

diff  --git a/llvm/test/Transforms/Attributor/dereferenceable-2.ll b/llvm/test/Transforms/Attributor/dereferenceable-2.ll
index ab407c3a84eb..4b682521812f 100644
--- a/llvm/test/Transforms/Attributor/dereferenceable-2.ll
+++ b/llvm/test/Transforms/Attributor/dereferenceable-2.ll
@@ -299,13 +299,15 @@ define void @volatile_is_not_dereferenceable(i16* %ptr) {
 ; IS__TUNIT____: Function Attrs: argmemonly nofree nounwind willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable
 ; IS__TUNIT____-SAME: (i16* nofree align 2 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
-; IS__TUNIT____-NEXT:    [[T0:%.*]] = load volatile i16, i16* [[PTR]], align 2
+; IS__TUNIT____-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr i16, i16* [[PTR]], i64 0
+; IS__TUNIT____-NEXT:    [[T0:%.*]] = load volatile i16, i16* [[ARRAYIDX0]], align 2
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@volatile_is_not_dereferenceable
 ; IS__CGSCC____-SAME: (i16* nofree align 2 [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
-; IS__CGSCC____-NEXT:    [[T0:%.*]] = load volatile i16, i16* [[PTR]], align 2
+; IS__CGSCC____-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr i16, i16* [[PTR]], i64 0
+; IS__CGSCC____-NEXT:    [[T0:%.*]] = load volatile i16, i16* [[ARRAYIDX0]], align 2
 ; IS__CGSCC____-NEXT:    ret void
 ;
   %arrayidx0 = getelementptr i16, i16* %ptr, i64 0

diff  --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index fb4c841ada29..e202f5952bfd 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -57,21 +57,23 @@ define void @h2s_value_simplify_interaction(i1 %c, i8* %A) {
 ; IS________NPM-LABEL: define {{[^@]+}}@h2s_value_simplify_interaction
 ; IS________NPM-SAME: (i1 [[C:%.*]], i8* nocapture nofree [[A:%.*]]) {
 ; IS________NPM-NEXT:  entry:
-; IS________NPM-NEXT:    [[TMP0:%.*]] = alloca i8, i64 4, align 1
+; IS________NPM-NEXT:    [[M:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
 ; IS________NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS________NPM:       t:
 ; IS________NPM-NEXT:    br i1 false, label [[DEAD:%.*]], label [[F2:%.*]]
 ; IS________NPM:       f:
 ; IS________NPM-NEXT:    br label [[J:%.*]]
 ; IS________NPM:       f2:
-; IS________NPM-NEXT:    [[L:%.*]] = load i8, i8* [[TMP0]], align 1
+; IS________NPM-NEXT:    [[C1:%.*]] = bitcast i8* [[M]] to i32*
+; IS________NPM-NEXT:    [[C2:%.*]] = bitcast i32* [[C1]] to i8*
+; IS________NPM-NEXT:    [[L:%.*]] = load i8, i8* [[C2]], align 1
 ; IS________NPM-NEXT:    call void @usei8(i8 [[L]])
-; IS________NPM-NEXT:    call void @no_sync_func(i8* nocapture nofree noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
+; IS________NPM-NEXT:    call void @no_sync_func(i8* nocapture nofree noundef [[C2]]) #[[ATTR6:[0-9]+]]
 ; IS________NPM-NEXT:    br label [[J]]
 ; IS________NPM:       dead:
 ; IS________NPM-NEXT:    unreachable
 ; IS________NPM:       j:
-; IS________NPM-NEXT:    [[PHI:%.*]] = phi i8* [ [[TMP0]], [[F]] ], [ null, [[F2]] ]
+; IS________NPM-NEXT:    [[PHI:%.*]] = phi i8* [ [[M]], [[F]] ], [ null, [[F2]] ]
 ; IS________NPM-NEXT:    tail call void @no_sync_func(i8* nocapture nofree noundef [[PHI]]) #[[ATTR6]]
 ; IS________NPM-NEXT:    ret void
 ;
@@ -569,8 +571,9 @@ define i32 @irreducible_cfg(i32 %0) {
 ; IS________NPM-NEXT:    [[TMP14]] = add nsw i32 [[DOT1]], 1
 ; IS________NPM-NEXT:    br label [[TMP8]]
 ; IS________NPM:       15:
-; IS________NPM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4
-; IS________NPM-NEXT:    ret i32 [[TMP16]]
+; IS________NPM-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP3]] to i8*
+; IS________NPM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP3]], align 4
+; IS________NPM-NEXT:    ret i32 [[TMP17]]
 ;
   %2 = call noalias i8* @malloc(i64 4)
   %3 = bitcast i8* %2 to i32*

diff  --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index cb74ed43cd73..e6a8d2510a37 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -377,68 +377,37 @@ define void @test11() {
 
 ; TEST 12
 define i32 @irreducible_cfg(i32 %0) {
-; IS________OPM-LABEL: define {{[^@]+}}@irreducible_cfg
-; IS________OPM-SAME: (i32 [[TMP0:%.*]]) {
-; IS________OPM-NEXT:    [[TMP2:%.*]] = call noalias i8* @malloc(i64 noundef 4)
-; IS________OPM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-; IS________OPM-NEXT:    store i32 10, i32* [[TMP3]], align 4
-; IS________OPM-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1
-; IS________OPM-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]]
-; IS________OPM:       5:
-; IS________OPM-NEXT:    [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5
-; IS________OPM-NEXT:    br label [[TMP13:%.*]]
-; IS________OPM:       7:
-; IS________OPM-NEXT:    br label [[TMP8:%.*]]
-; IS________OPM:       8:
-; IS________OPM-NEXT:    [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ]
-; IS________OPM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4
-; IS________OPM-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
-; IS________OPM-NEXT:    store i32 [[TMP10]], i32* [[TMP3]], align 4
-; IS________OPM-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0
-; IS________OPM-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]]
-; IS________OPM:       12:
-; IS________OPM-NEXT:    br label [[TMP13]]
-; IS________OPM:       13:
-; IS________OPM-NEXT:    [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ]
-; IS________OPM-NEXT:    [[TMP14]] = add nsw i32 [[DOT1]], 1
-; IS________OPM-NEXT:    br label [[TMP8]]
-; IS________OPM:       15:
-; IS________OPM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4
-; IS________OPM-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8*
-; IS________OPM-NEXT:    call void @free(i8* nocapture noundef [[TMP17]])
-; IS________OPM-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4
-; IS________OPM-NEXT:    ret i32 [[TMP18]]
-;
-; IS________NPM-LABEL: define {{[^@]+}}@irreducible_cfg
-; IS________NPM-SAME: (i32 [[TMP0:%.*]]) {
-; IS________NPM-NEXT:    [[TMP2:%.*]] = call noalias i8* @malloc(i64 noundef 4)
-; IS________NPM-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-; IS________NPM-NEXT:    store i32 10, i32* [[TMP3]], align 4
-; IS________NPM-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1
-; IS________NPM-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]]
-; IS________NPM:       5:
-; IS________NPM-NEXT:    [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5
-; IS________NPM-NEXT:    br label [[TMP13:%.*]]
-; IS________NPM:       7:
-; IS________NPM-NEXT:    br label [[TMP8:%.*]]
-; IS________NPM:       8:
-; IS________NPM-NEXT:    [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ]
-; IS________NPM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4
-; IS________NPM-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
-; IS________NPM-NEXT:    store i32 [[TMP10]], i32* [[TMP3]], align 4
-; IS________NPM-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0
-; IS________NPM-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]]
-; IS________NPM:       12:
-; IS________NPM-NEXT:    br label [[TMP13]]
-; IS________NPM:       13:
-; IS________NPM-NEXT:    [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ]
-; IS________NPM-NEXT:    [[TMP14]] = add nsw i32 [[DOT1]], 1
-; IS________NPM-NEXT:    br label [[TMP8]]
-; IS________NPM:       15:
-; IS________NPM-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4
-; IS________NPM-NEXT:    call void @free(i8* nocapture noundef [[TMP2]])
-; IS________NPM-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP3]], align 4
-; IS________NPM-NEXT:    ret i32 [[TMP17]]
+; CHECK-LABEL: define {{[^@]+}}@irreducible_cfg
+; CHECK-SAME: (i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[TMP2:%.*]] = call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
+; CHECK-NEXT:    store i32 10, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP0]], 1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP7:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i32 [[TMP0]], 5
+; CHECK-NEXT:    br label [[TMP13:%.*]]
+; CHECK:       7:
+; CHECK-NEXT:    br label [[TMP8:%.*]]
+; CHECK:       8:
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ [[TMP14:%.*]], [[TMP13]] ], [ 1, [[TMP7]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
+; CHECK-NEXT:    store i32 [[TMP10]], i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]]
+; CHECK:       12:
+; CHECK-NEXT:    br label [[TMP13]]
+; CHECK:       13:
+; CHECK-NEXT:    [[DOT1:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[DOT0]], [[TMP12]] ]
+; CHECK-NEXT:    [[TMP14]] = add nsw i32 [[DOT1]], 1
+; CHECK-NEXT:    br label [[TMP8]]
+; CHECK:       15:
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP3]] to i8*
+; CHECK-NEXT:    call void @free(i8* nocapture noundef [[TMP17]])
+; CHECK-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    ret i32 [[TMP18]]
 ;
   %2 = call noalias i8* @malloc(i64 4)
   %3 = bitcast i8* %2 to i32*

diff  --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll
index 7ad94c187677..484aa14cc4e3 100644
--- a/llvm/test/Transforms/Attributor/internal-noalias.ll
+++ b/llvm/test/Transforms/Attributor/internal-noalias.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/internalize.ll b/llvm/test/Transforms/Attributor/internalize.ll
index edb5cb7599e6..651c841622a9 100644
--- a/llvm/test/Transforms/Attributor/internalize.ll
+++ b/llvm/test/Transforms/Attributor/internalize.ll
@@ -8,8 +8,8 @@
 
 ; Deep Wrapper enabled
 
-; RUN: opt -attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_TUNIT_NPM_ENABLED,IS__TUNIT_____ENABLED,IS________OPM_ENABLED,IS__TUNIT_OPM_ENABLED
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED
+; RUN: opt -attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM,CHECK_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_TUNIT_NPM_ENABLED,IS__TUNIT_____ENABLED,IS________OPM_ENABLED,IS__TUNIT_OPM_ENABLED
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM,CHECK_ENABLED,NOT_CGSCC_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,IS__TUNIT_____ENABLED,IS________NPM_ENABLED,IS__TUNIT_NPM_ENABLED
 ; RUN: opt -attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_NPM_ENABLED,IS__CGSCC_____ENABLED,IS________OPM_ENABLED,IS__CGSCC_OPM_ENABLED
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -attributor-allow-deep-wrappers -disable-inlining -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM,CHECK_ENABLED,NOT_TUNIT_NPM_ENABLED,NOT_TUNIT_OPM_ENABLED,NOT_CGSCC_OPM_ENABLED,IS__CGSCC_____ENABLED,IS________NPM_ENABLED,IS__CGSCC_NPM_ENABLED
 

diff  --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll
index e37b886cd781..0cf0cbcdc59e 100644
--- a/llvm/test/Transforms/Attributor/liveness.ll
+++ b/llvm/test/Transforms/Attributor/liveness.ll
@@ -2310,7 +2310,7 @@ define void @call_via_pointer_with_dead_args(i32* %a, i32* %b, void (i32*, i32*,
 ; FIXME: We have to prevent the propagation of %fp in the new pm CGSCC pass until the CallGraphUpdater can handle the new call edge.
 define internal void @call_via_pointer_with_dead_args_internal_a(i32* %a, i32* %b, void (i32*, i32*, i32*, i64, i32**)* %fp) {
 ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_a
-; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]]) {
+; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) {
 ; NOT_CGSCC_NPM-NEXT:    call void @called_via_pointer(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null)
 ; NOT_CGSCC_NPM-NEXT:    ret void
 ;
@@ -2324,7 +2324,7 @@ define internal void @call_via_pointer_with_dead_args_internal_a(i32* %a, i32* %
 }
 define internal void @call_via_pointer_with_dead_args_internal_b(i32* %a, i32* %b, void (i32*, i32*, i32*, i64, i32**)* %fp) {
 ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_b
-; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]]) {
+; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) {
 ; NOT_CGSCC_NPM-NEXT:    call void @called_via_pointer_internal_2(i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null)
 ; NOT_CGSCC_NPM-NEXT:    ret void
 ;
@@ -2345,8 +2345,8 @@ define void @call_via_pointer_with_dead_args_caller(i32* %a, i32* %b) {
 ; NOT_CGSCC_NPM-NEXT:    [[PTR4:%.*]] = alloca i32, align 128
 ; NOT_CGSCC_NPM-NEXT:    call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer)
 ; NOT_CGSCC_NPM-NEXT:    call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer_internal_1)
-; NOT_CGSCC_NPM-NEXT:    call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR3]])
-; NOT_CGSCC_NPM-NEXT:    call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR4]])
+; NOT_CGSCC_NPM-NEXT:    call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer)
+; NOT_CGSCC_NPM-NEXT:    call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef @called_via_pointer_internal_2)
 ; NOT_CGSCC_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_caller
@@ -2434,7 +2434,7 @@ define internal void @dead_with_blockaddress_users(i32* nocapture %pc) nounwind
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@dead_with_blockaddress_users
-; IS__CGSCC____-SAME: () #[[ATTR14:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32* noalias nocapture nofree nonnull readonly align 536870912 dereferenceable(4294967295) [[PC:%.*]]) #[[ATTR14:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    br label [[INDIRECTGOTO:%.*]]
 ; IS__CGSCC____:       lab0:

diff  --git a/llvm/test/Transforms/Attributor/lvi-for-ashr.ll b/llvm/test/Transforms/Attributor/lvi-for-ashr.ll
index f1884f04e700..51416e06bf9f 100644
--- a/llvm/test/Transforms/Attributor/lvi-for-ashr.ll
+++ b/llvm/test/Transforms/Attributor/lvi-for-ashr.ll
@@ -7,87 +7,47 @@
 ; FIXME: DOT should be replaced with 3
 
 define i32 @test-ashr(i32 %c) {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test-ashr
-; IS__TUNIT_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__TUNIT_OPM-NEXT:  chk65:
-; IS__TUNIT_OPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[C]], 65
-; IS__TUNIT_OPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[CHK0:%.*]]
-; IS__TUNIT_OPM:       chk0:
-; IS__TUNIT_OPM-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[C]], 0
-; IS__TUNIT_OPM-NEXT:    br i1 [[CMP]], label [[RETURN]], label [[BB_IF:%.*]]
-; IS__TUNIT_OPM:       bb_if:
-; IS__TUNIT_OPM-NEXT:    [[ASHR_VAL:%.*]] = ashr exact i32 [[C]], 2
-; IS__TUNIT_OPM-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[ASHR_VAL]], 15
-; IS__TUNIT_OPM-NEXT:    br i1 [[CMP2]], label [[BB_THEN:%.*]], label [[RETURN]]
-; IS__TUNIT_OPM:       bb_then:
-; IS__TUNIT_OPM-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[ASHR_VAL]], 16
-; IS__TUNIT_OPM-NEXT:    [[DOT:%.*]] = select i1 [[CMP3]], i32 3, i32 2
-; IS__TUNIT_OPM-NEXT:    br label [[RETURN]]
-; IS__TUNIT_OPM:       return:
-; IS__TUNIT_OPM-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[CHK65:%.*]] ], [ 1, [[CHK0]] ], [ [[DOT]], [[BB_THEN]] ], [ 4, [[BB_IF]] ]
-; IS__TUNIT_OPM-NEXT:    ret i32 [[RETVAL]]
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test-ashr
+; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT____-NEXT:  chk65:
+; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[C]], 65
+; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[CHK0:%.*]]
+; IS__TUNIT____:       chk0:
+; IS__TUNIT____-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[C]], 0
+; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[RETURN]], label [[BB_IF:%.*]]
+; IS__TUNIT____:       bb_if:
+; IS__TUNIT____-NEXT:    [[ASHR_VAL:%.*]] = ashr exact i32 [[C]], 2
+; IS__TUNIT____-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[ASHR_VAL]], 15
+; IS__TUNIT____-NEXT:    br i1 [[CMP2]], label [[BB_THEN:%.*]], label [[RETURN]]
+; IS__TUNIT____:       bb_then:
+; IS__TUNIT____-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[ASHR_VAL]], 16
+; IS__TUNIT____-NEXT:    [[DOT:%.*]] = select i1 [[CMP3]], i32 3, i32 2
+; IS__TUNIT____-NEXT:    br label [[RETURN]]
+; IS__TUNIT____:       return:
+; IS__TUNIT____-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[CHK65:%.*]] ], [ 1, [[CHK0]] ], [ [[DOT]], [[BB_THEN]] ], [ 4, [[BB_IF]] ]
+; IS__TUNIT____-NEXT:    ret i32 [[RETVAL]]
 ;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test-ashr
-; IS__TUNIT_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__TUNIT_NPM-NEXT:  chk65:
-; IS__TUNIT_NPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[C]], 65
-; IS__TUNIT_NPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[CHK0:%.*]]
-; IS__TUNIT_NPM:       chk0:
-; IS__TUNIT_NPM-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[C]], 0
-; IS__TUNIT_NPM-NEXT:    br i1 [[CMP]], label [[RETURN]], label [[BB_IF:%.*]]
-; IS__TUNIT_NPM:       bb_if:
-; IS__TUNIT_NPM-NEXT:    [[ASHR_VAL:%.*]] = ashr exact i32 [[C]], 2
-; IS__TUNIT_NPM-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[ASHR_VAL]], 15
-; IS__TUNIT_NPM-NEXT:    br i1 [[CMP2]], label [[BB_THEN:%.*]], label [[RETURN]]
-; IS__TUNIT_NPM:       bb_then:
-; IS__TUNIT_NPM-NEXT:    [[DOT:%.*]] = select i1 true, i32 3, i32 2
-; IS__TUNIT_NPM-NEXT:    br label [[RETURN]]
-; IS__TUNIT_NPM:       return:
-; IS__TUNIT_NPM-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[CHK65:%.*]] ], [ 1, [[CHK0]] ], [ [[DOT]], [[BB_THEN]] ], [ 4, [[BB_IF]] ]
-; IS__TUNIT_NPM-NEXT:    ret i32 [[RETVAL]]
-;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test-ashr
-; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:  chk65:
-; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[C]], 65
-; IS__CGSCC_OPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[CHK0:%.*]]
-; IS__CGSCC_OPM:       chk0:
-; IS__CGSCC_OPM-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[C]], 0
-; IS__CGSCC_OPM-NEXT:    br i1 [[CMP]], label [[RETURN]], label [[BB_IF:%.*]]
-; IS__CGSCC_OPM:       bb_if:
-; IS__CGSCC_OPM-NEXT:    [[ASHR_VAL:%.*]] = ashr exact i32 [[C]], 2
-; IS__CGSCC_OPM-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[ASHR_VAL]], 15
-; IS__CGSCC_OPM-NEXT:    br i1 [[CMP2]], label [[BB_THEN:%.*]], label [[RETURN]]
-; IS__CGSCC_OPM:       bb_then:
-; IS__CGSCC_OPM-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[ASHR_VAL]], 16
-; IS__CGSCC_OPM-NEXT:    [[DOT:%.*]] = select i1 [[CMP3]], i32 3, i32 2
-; IS__CGSCC_OPM-NEXT:    br label [[RETURN]]
-; IS__CGSCC_OPM:       return:
-; IS__CGSCC_OPM-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[CHK65:%.*]] ], [ 1, [[CHK0]] ], [ [[DOT]], [[BB_THEN]] ], [ 4, [[BB_IF]] ]
-; IS__CGSCC_OPM-NEXT:    ret i32 [[RETVAL]]
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test-ashr
-; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT:  chk65:
-; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[C]], 65
-; IS__CGSCC_NPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[CHK0:%.*]]
-; IS__CGSCC_NPM:       chk0:
-; IS__CGSCC_NPM-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[C]], 0
-; IS__CGSCC_NPM-NEXT:    br i1 [[CMP]], label [[RETURN]], label [[BB_IF:%.*]]
-; IS__CGSCC_NPM:       bb_if:
-; IS__CGSCC_NPM-NEXT:    [[ASHR_VAL:%.*]] = ashr exact i32 [[C]], 2
-; IS__CGSCC_NPM-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[ASHR_VAL]], 15
-; IS__CGSCC_NPM-NEXT:    br i1 [[CMP2]], label [[BB_THEN:%.*]], label [[RETURN]]
-; IS__CGSCC_NPM:       bb_then:
-; IS__CGSCC_NPM-NEXT:    [[DOT:%.*]] = select i1 true, i32 3, i32 2
-; IS__CGSCC_NPM-NEXT:    br label [[RETURN]]
-; IS__CGSCC_NPM:       return:
-; IS__CGSCC_NPM-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[CHK65:%.*]] ], [ 1, [[CHK0]] ], [ [[DOT]], [[BB_THEN]] ], [ 4, [[BB_IF]] ]
-; IS__CGSCC_NPM-NEXT:    ret i32 [[RETVAL]]
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test-ashr
+; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__CGSCC____-NEXT:  chk65:
+; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[C]], 65
+; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[CHK0:%.*]]
+; IS__CGSCC____:       chk0:
+; IS__CGSCC____-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[C]], 0
+; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[RETURN]], label [[BB_IF:%.*]]
+; IS__CGSCC____:       bb_if:
+; IS__CGSCC____-NEXT:    [[ASHR_VAL:%.*]] = ashr exact i32 [[C]], 2
+; IS__CGSCC____-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[ASHR_VAL]], 15
+; IS__CGSCC____-NEXT:    br i1 [[CMP2]], label [[BB_THEN:%.*]], label [[RETURN]]
+; IS__CGSCC____:       bb_then:
+; IS__CGSCC____-NEXT:    [[CMP3:%.*]] = icmp eq i32 [[ASHR_VAL]], 16
+; IS__CGSCC____-NEXT:    [[DOT:%.*]] = select i1 [[CMP3]], i32 3, i32 2
+; IS__CGSCC____-NEXT:    br label [[RETURN]]
+; IS__CGSCC____:       return:
+; IS__CGSCC____-NEXT:    [[RETVAL:%.*]] = phi i32 [ 0, [[CHK65:%.*]] ], [ 1, [[CHK0]] ], [ [[DOT]], [[BB_THEN]] ], [ 4, [[BB_IF]] ]
+; IS__CGSCC____-NEXT:    ret i32 [[RETVAL]]
 ;
 chk65:
   %cmp = icmp sgt i32 %c, 65
@@ -112,7 +72,7 @@ return:
   ret i32 %retval
 }
 ;.
-; IS__TUNIT____: attributes #[[ATTR0:[0-9]+]] = { nofree nosync nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn }
 ;.
-; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll
index 60cf10019c16..cacdf5cfc158 100644
--- a/llvm/test/Transforms/Attributor/memory_locations.ll
+++ b/llvm/test/Transforms/Attributor/memory_locations.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -631,7 +631,7 @@ define i8 @readnone_caller(i1 %c) {
 define internal i8 @recursive_not_readnone_internal2(i8* %ptr, i1 %c) {
 ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal2
-; IS__TUNIT____-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
+; IS__TUNIT____-SAME: (i8* noalias nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
 ; IS__TUNIT____-NEXT:    [[ALLOC:%.*]] = alloca i8, align 1
 ; IS__TUNIT____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT____:       t:
@@ -644,7 +644,7 @@ define internal i8 @recursive_not_readnone_internal2(i8* %ptr, i1 %c) {
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal2
-; IS__CGSCC____-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
+; IS__CGSCC____-SAME: (i8* noalias nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
 ; IS__CGSCC____-NEXT:    [[ALLOC:%.*]] = alloca i8, align 1
 ; IS__CGSCC____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC____:       t:

diff  --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll
index e28fd41c7eec..10e3218036e4 100644
--- a/llvm/test/Transforms/Attributor/noalias.ll
+++ b/llvm/test/Transforms/Attributor/noalias.ll
@@ -429,11 +429,13 @@ define void @test12_4(){
 ; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() {
 ; NOT_TUNIT_OPM-NEXT:    [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
 ; NOT_TUNIT_OPM-NEXT:    [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; NOT_TUNIT_OPM-NEXT:    [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0
 ; NOT_TUNIT_OPM-NEXT:    [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1
+; NOT_TUNIT_OPM-NEXT:    [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0
 ; NOT_TUNIT_OPM-NEXT:    tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]])
-; NOT_TUNIT_OPM-NEXT:    tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A]])
+; NOT_TUNIT_OPM-NEXT:    tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]])
 ; NOT_TUNIT_OPM-NEXT:    tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]])
-; NOT_TUNIT_OPM-NEXT:    tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[B]])
+; NOT_TUNIT_OPM-NEXT:    tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]])
 ; NOT_TUNIT_OPM-NEXT:    ret void
 ;
   %A = tail call noalias i8* @malloc(i64 4)
@@ -465,17 +467,12 @@ define void @use_i8_internal(i8* %a) {
 }
 
 define void @test13_use_noalias(){
-; IS________OPM-LABEL: define {{[^@]+}}@test13_use_noalias() {
-; IS________OPM-NEXT:    [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
-; IS________OPM-NEXT:    [[C1:%.*]] = bitcast i8* [[M1]] to i16*
-; IS________OPM-NEXT:    [[C2:%.*]] = bitcast i16* [[C1]] to i8*
-; IS________OPM-NEXT:    call void @use_i8_internal(i8* noalias nocapture [[C2]])
-; IS________OPM-NEXT:    ret void
-;
-; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() {
-; NOT_TUNIT_OPM-NEXT:    [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
-; NOT_TUNIT_OPM-NEXT:    call void @use_i8_internal(i8* noalias nocapture [[M1]])
-; NOT_TUNIT_OPM-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@test13_use_noalias() {
+; CHECK-NEXT:    [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    [[C1:%.*]] = bitcast i8* [[M1]] to i16*
+; CHECK-NEXT:    [[C2:%.*]] = bitcast i16* [[C1]] to i8*
+; CHECK-NEXT:    call void @use_i8_internal(i8* noalias nocapture [[C2]])
+; CHECK-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test13_use_noalias()
 ; IS__CGSCC_OPM-NEXT:    [[M1:%.*]] = tail call noalias i8* @malloc(i64 4)
@@ -491,20 +488,14 @@ define void @test13_use_noalias(){
 }
 
 define void @test13_use_alias(){
-; IS________OPM-LABEL: define {{[^@]+}}@test13_use_alias() {
-; IS________OPM-NEXT:    [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
-; IS________OPM-NEXT:    [[C1:%.*]] = bitcast i8* [[M1]] to i16*
-; IS________OPM-NEXT:    [[C2A:%.*]] = bitcast i16* [[C1]] to i8*
-; IS________OPM-NEXT:    [[C2B:%.*]] = bitcast i16* [[C1]] to i8*
-; IS________OPM-NEXT:    call void @use_i8_internal(i8* nocapture [[C2A]])
-; IS________OPM-NEXT:    call void @use_i8_internal(i8* nocapture [[C2B]])
-; IS________OPM-NEXT:    ret void
-;
-; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test13_use_alias() {
-; NOT_TUNIT_OPM-NEXT:    [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
-; NOT_TUNIT_OPM-NEXT:    call void @use_i8_internal(i8* nocapture [[M1]])
-; NOT_TUNIT_OPM-NEXT:    call void @use_i8_internal(i8* nocapture [[M1]])
-; NOT_TUNIT_OPM-NEXT:    ret void
+; CHECK-LABEL: define {{[^@]+}}@test13_use_alias() {
+; CHECK-NEXT:    [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4)
+; CHECK-NEXT:    [[C1:%.*]] = bitcast i8* [[M1]] to i16*
+; CHECK-NEXT:    [[C2A:%.*]] = bitcast i16* [[C1]] to i8*
+; CHECK-NEXT:    [[C2B:%.*]] = bitcast i16* [[C1]] to i8*
+; CHECK-NEXT:    call void @use_i8_internal(i8* nocapture [[C2A]])
+; CHECK-NEXT:    call void @use_i8_internal(i8* nocapture [[C2B]])
+; CHECK-NEXT:    ret void
 ;
   %m1 = tail call noalias i8* @malloc(i64 4)
   %c1 = bitcast i8* %m1 to i16*

diff  --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll
index e27d37ccde6d..f2e4b4e0dce9 100644
--- a/llvm/test/Transforms/Attributor/nocapture-1.ll
+++ b/llvm/test/Transforms/Attributor/nocapture-1.ll
@@ -54,7 +54,7 @@ define void @c3(i32* %q) {
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@c3
 ; IS__CGSCC____-SAME: (i32* nofree writeonly [[Q:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    call void @c2(i32* nofree writeonly [[Q]]) #[[ATTR19:[0-9]+]]
+; IS__CGSCC____-NEXT:    call void @c2(i32* nofree writeonly [[Q]]) #[[ATTR17:[0-9]+]]
 ; IS__CGSCC____-NEXT:    ret void
 ;
   call void @c2(i32* %q)
@@ -221,14 +221,14 @@ define i1 @c7(i32* %q, i32 %bitno) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@c7
 ; IS__TUNIT____-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR2]] {
-; IS__TUNIT____-NEXT:    [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR17:[0-9]+]]
+; IS__TUNIT____-NEXT:    [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR15:[0-9]+]]
 ; IS__TUNIT____-NEXT:    [[VAL:%.*]] = load i1, i1* [[PTR]], align 1
 ; IS__TUNIT____-NEXT:    ret i1 [[VAL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@c7
 ; IS__CGSCC____-SAME: (i32* nofree readonly [[Q:%.*]], i32 [[BITNO:%.*]]) #[[ATTR2]] {
-; IS__CGSCC____-NEXT:    [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR20:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[PTR:%.*]] = call i1* @lookup_bit(i32* noalias nofree readnone [[Q]], i32 [[BITNO]]) #[[ATTR18:[0-9]+]]
 ; IS__CGSCC____-NEXT:    [[VAL:%.*]] = load i1, i1* [[PTR]], align 1
 ; IS__CGSCC____-NEXT:    ret i1 [[VAL]]
 ;
@@ -245,23 +245,27 @@ define i32 @nc1(i32* %q, i32* %p, i1 %b) {
 ; IS__TUNIT____-NEXT:  e:
 ; IS__TUNIT____-NEXT:    br label [[L:%.*]]
 ; IS__TUNIT____:       l:
-; IS__TUNIT____-NEXT:    [[Y:%.*]] = phi i32* [ [[Q]], [[E:%.*]] ]
-; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = select i1 [[B]], i32* [[P]], i32* [[Y]]
+; IS__TUNIT____-NEXT:    [[X:%.*]] = phi i32* [ [[P]], [[E:%.*]] ]
+; IS__TUNIT____-NEXT:    [[Y:%.*]] = phi i32* [ [[Q]], [[E]] ]
+; IS__TUNIT____-NEXT:    [[TMP:%.*]] = bitcast i32* [[X]] to i32*
+; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = select i1 [[B]], i32* [[TMP]], i32* [[Y]]
 ; IS__TUNIT____-NEXT:    [[VAL:%.*]] = load i32, i32* [[TMP2]], align 4
-; IS__TUNIT____-NEXT:    store i32 0, i32* [[P]], align 4
+; IS__TUNIT____-NEXT:    store i32 0, i32* [[TMP]], align 4
 ; IS__TUNIT____-NEXT:    store i32* [[Y]], i32** @g, align 8
 ; IS__TUNIT____-NEXT:    ret i32 [[VAL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@nc1
-; IS__CGSCC____-SAME: (i32* nofree [[Q:%.*]], i32* nocapture nofree align 4 [[P:%.*]], i1 [[B:%.*]]) #[[ATTR5:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32* nofree [[Q:%.*]], i32* nocapture nofree [[P:%.*]], i1 [[B:%.*]]) #[[ATTR5:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  e:
 ; IS__CGSCC____-NEXT:    br label [[L:%.*]]
 ; IS__CGSCC____:       l:
-; IS__CGSCC____-NEXT:    [[Y:%.*]] = phi i32* [ [[Q]], [[E:%.*]] ]
-; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = select i1 [[B]], i32* [[P]], i32* [[Y]]
+; IS__CGSCC____-NEXT:    [[X:%.*]] = phi i32* [ [[P]], [[E:%.*]] ]
+; IS__CGSCC____-NEXT:    [[Y:%.*]] = phi i32* [ [[Q]], [[E]] ]
+; IS__CGSCC____-NEXT:    [[TMP:%.*]] = bitcast i32* [[X]] to i32*
+; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = select i1 [[B]], i32* [[TMP]], i32* [[Y]]
 ; IS__CGSCC____-NEXT:    [[VAL:%.*]] = load i32, i32* [[TMP2]], align 4
-; IS__CGSCC____-NEXT:    store i32 0, i32* [[P]], align 4
+; IS__CGSCC____-NEXT:    store i32 0, i32* [[TMP]], align 4
 ; IS__CGSCC____-NEXT:    store i32* [[Y]], i32** @g, align 8
 ; IS__CGSCC____-NEXT:    ret i32 [[VAL]]
 ;
@@ -331,8 +335,8 @@ define void @nc2(i32* %p, i32* %q) {
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@nc2
-; IS__CGSCC____-SAME: (i32* nocapture nofree align 4 [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR5]] {
-; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree align 4 [[P]], i1 noundef false) #[[ATTR16:[0-9]+]]
+; IS__CGSCC____-SAME: (i32* nocapture nofree [[P:%.*]], i32* nofree [[Q:%.*]]) #[[ATTR5]] {
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i32 @nc1(i32* nofree [[Q]], i32* nocapture nofree [[P]], i1 noundef false) #[[ATTR14:[0-9]+]]
 ; IS__CGSCC____-NEXT:    ret void
 ;
   %1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
@@ -357,13 +361,13 @@ define void @nc4(i8* %p) {
 ; IS__TUNIT____: Function Attrs: argmemonly nounwind
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@nc4
 ; IS__TUNIT____-SAME: (i8* [[P:%.*]]) #[[ATTR6:[0-9]+]] {
-; IS__TUNIT____-NEXT:    call void @external(i8* readonly [[P]]) #[[ATTR18:[0-9]+]]
+; IS__TUNIT____-NEXT:    call void @external(i8* readonly [[P]]) #[[ATTR16:[0-9]+]]
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nounwind
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@nc4
 ; IS__CGSCC____-SAME: (i8* [[P:%.*]]) #[[ATTR6:[0-9]+]] {
-; IS__CGSCC____-NEXT:    call void @external(i8* readonly [[P]]) #[[ATTR21:[0-9]+]]
+; IS__CGSCC____-NEXT:    call void @external(i8* readonly [[P]]) #[[ATTR19:[0-9]+]]
 ; IS__CGSCC____-NEXT:    ret void
 ;
   call void @external(i8* %p)
@@ -607,19 +611,19 @@ entry:
 }
 
 define void @nocaptureLaunder(i8* %p) {
-; IS__TUNIT____: Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@nocaptureLaunder
-; IS__TUNIT____-SAME: (i8* nocapture nofree [[P:%.*]]) #[[ATTR10:[0-9]+]] {
+; IS__TUNIT____-SAME: (i8* nocapture nofree [[P:%.*]]) #[[ATTR5]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR19:[0-9]+]]
+; IS__TUNIT____-NEXT:    [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR17:[0-9]+]]
 ; IS__TUNIT____-NEXT:    store i8 42, i8* [[B]], align 1
 ; IS__TUNIT____-NEXT:    ret void
 ;
-; IS__CGSCC____: Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; IS__CGSCC____: Function Attrs: nofree nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@nocaptureLaunder
 ; IS__CGSCC____-SAME: (i8* nocapture nofree [[P:%.*]]) #[[ATTR10:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR22:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR20:[0-9]+]]
 ; IS__CGSCC____-NEXT:    store i8 42, i8* [[B]], align 1
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -634,14 +638,14 @@ define void @captureLaunder(i8* %p) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@captureLaunder
 ; IS__TUNIT____-SAME: (i8* nofree [[P:%.*]]) #[[ATTR5]] {
-; IS__TUNIT____-NEXT:    [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR19]]
+; IS__TUNIT____-NEXT:    [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR17]]
 ; IS__TUNIT____-NEXT:    store i8* [[B]], i8** @g2, align 8
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: nofree nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@captureLaunder
-; IS__CGSCC____-SAME: (i8* nofree [[P:%.*]]) #[[ATTR11:[0-9]+]] {
-; IS__CGSCC____-NEXT:    [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR22]]
+; IS__CGSCC____-SAME: (i8* nofree [[P:%.*]]) #[[ATTR10]] {
+; IS__CGSCC____-NEXT:    [[B:%.*]] = call i8* @llvm.launder.invariant.group.p0i8(i8* nofree [[P]]) #[[ATTR20]]
 ; IS__CGSCC____-NEXT:    store i8* [[B]], i8** @g2, align 8
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -651,19 +655,19 @@ define void @captureLaunder(i8* %p) {
 }
 
 define void @nocaptureStrip(i8* %p) {
-; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@nocaptureStrip
-; IS__TUNIT____-SAME: (i8* nocapture nofree writeonly [[P:%.*]]) #[[ATTR11:[0-9]+]] {
+; IS__TUNIT____-SAME: (i8* nocapture nofree writeonly [[P:%.*]]) #[[ATTR1]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR20:[0-9]+]]
+; IS__TUNIT____-NEXT:    [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18:[0-9]+]]
 ; IS__TUNIT____-NEXT:    store i8 42, i8* [[B]], align 1
 ; IS__TUNIT____-NEXT:    ret void
 ;
-; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
+; IS__CGSCC____: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@nocaptureStrip
-; IS__CGSCC____-SAME: (i8* nocapture nofree writeonly [[P:%.*]]) #[[ATTR12:[0-9]+]] {
+; IS__CGSCC____-SAME: (i8* nocapture nofree writeonly [[P:%.*]]) #[[ATTR11:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR20]]
+; IS__CGSCC____-NEXT:    [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]]
 ; IS__CGSCC____-NEXT:    store i8 42, i8* [[B]], align 1
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -678,14 +682,14 @@ define void @captureStrip(i8* %p) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@captureStrip
 ; IS__TUNIT____-SAME: (i8* nofree writeonly [[P:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR20]]
+; IS__TUNIT____-NEXT:    [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]]
 ; IS__TUNIT____-NEXT:    store i8* [[B]], i8** @g3, align 8
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@captureStrip
-; IS__CGSCC____-SAME: (i8* nofree writeonly [[P:%.*]]) #[[ATTR13:[0-9]+]] {
-; IS__CGSCC____-NEXT:    [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR20]]
+; IS__CGSCC____-SAME: (i8* nofree writeonly [[P:%.*]]) #[[ATTR11]] {
+; IS__CGSCC____-NEXT:    [[B:%.*]] = call i8* @llvm.strip.invariant.group.p0i8(i8* noalias nofree readnone [[P]]) #[[ATTR18]]
 ; IS__CGSCC____-NEXT:    store i8* [[B]], i8** @g3, align 8
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -785,14 +789,14 @@ define i1 @nocaptureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x
 define i1 @captureDereferenceableOrNullICmp(i32* dereferenceable_or_null(4) %x) null_pointer_is_valid {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind null_pointer_is_valid readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@captureDereferenceableOrNullICmp
-; IS__TUNIT____-SAME: (i32* nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR12:[0-9]+]] {
+; IS__TUNIT____-SAME: (i32* nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR10:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = bitcast i32* [[X]] to i8*
 ; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null
 ; IS__TUNIT____-NEXT:    ret i1 [[TMP2]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@captureDereferenceableOrNullICmp
-; IS__CGSCC____-SAME: (i32* nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR14:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32* nofree readnone dereferenceable_or_null(4) [[X:%.*]]) #[[ATTR12:[0-9]+]] {
 ; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = bitcast i32* [[X]] to i8*
 ; IS__CGSCC____-NEXT:    [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null
 ; IS__CGSCC____-NEXT:    ret i1 [[TMP2]]
@@ -822,7 +826,7 @@ define i8* @test_returned1(i8* %A, i8* returned %B) nounwind readonly {
 ; CHECK-SAME: (i8* nocapture readonly [[A:%.*]], i8* readonly returned [[B:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P:%.*]] = call i8* @unknownpi8pi8(i8* [[A]], i8* [[B]])
-; CHECK-NEXT:    ret i8* [[B]]
+; CHECK-NEXT:    ret i8* [[P]]
 ;
 entry:
   %p = call i8* @unknownpi8pi8(i8* %A, i8* %B)
@@ -835,7 +839,7 @@ define i8* @test_returned2(i8* %A, i8* %B) {
 ; CHECK-SAME: (i8* nocapture readonly [[A:%.*]], i8* readonly returned [[B:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[P:%.*]] = call i8* @unknownpi8pi8(i8* readonly [[A]], i8* readonly [[B]]) #[[ATTR4]]
-; CHECK-NEXT:    ret i8* [[B]]
+; CHECK-NEXT:    ret i8* [[P]]
 ;
 entry:
   %p = call i8* @unknownpi8pi8(i8* %A, i8* %B) nounwind readonly
@@ -850,13 +854,13 @@ declare void @val_use(i8 %ptr) readonly nounwind willreturn
 define void @ptr_uses(i8* %ptr, i8* %wptr) {
 ; IS__TUNIT____: Function Attrs: nounwind willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@ptr_uses
-; IS__TUNIT____-SAME: (i8* [[PTR:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR14:[0-9]+]] {
+; IS__TUNIT____-SAME: (i8* [[PTR:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR12:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    store i8 0, i8* [[WPTR]], align 1
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@ptr_uses
-; IS__CGSCC____-SAME: (i8* [[PTR:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR16]] {
+; IS__CGSCC____-SAME: (i8* [[PTR:%.*]], i8* nocapture nofree noundef nonnull writeonly dereferenceable(1) [[WPTR:%.*]]) #[[ATTR14]] {
 ; IS__CGSCC____-NEXT:    store i8 0, i8* [[WPTR]], align 1
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -880,17 +884,15 @@ declare i8* @llvm.strip.invariant.group.p0i8(i8*)
 ; IS__TUNIT____: attributes #[[ATTR7]] = { nofree nosync nounwind writeonly }
 ; IS__TUNIT____: attributes #[[ATTR8]] = { nofree noreturn nosync nounwind readnone willreturn }
 ; IS__TUNIT____: attributes #[[ATTR9]] = { argmemonly nofree nounwind willreturn }
-; IS__TUNIT____: attributes #[[ATTR10]] = { inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn }
-; IS__TUNIT____: attributes #[[ATTR11]] = { argmemonly nofree nosync nounwind willreturn writeonly }
-; IS__TUNIT____: attributes #[[ATTR12]] = { nofree nosync nounwind null_pointer_is_valid readnone willreturn }
-; IS__TUNIT____: attributes #[[ATTR13:[0-9]+]] = { nounwind readonly willreturn }
-; IS__TUNIT____: attributes #[[ATTR14]] = { nounwind willreturn }
-; IS__TUNIT____: attributes #[[ATTR15:[0-9]+]] = { inaccessiblememonly nofree nosync nounwind speculatable willreturn }
-; IS__TUNIT____: attributes #[[ATTR16:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn }
-; IS__TUNIT____: attributes #[[ATTR17]] = { nofree nounwind readnone willreturn }
-; IS__TUNIT____: attributes #[[ATTR18]] = { nounwind }
-; IS__TUNIT____: attributes #[[ATTR19]] = { willreturn }
-; IS__TUNIT____: attributes #[[ATTR20]] = { readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR10]] = { nofree nosync nounwind null_pointer_is_valid readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR11:[0-9]+]] = { nounwind readonly willreturn }
+; IS__TUNIT____: attributes #[[ATTR12]] = { nounwind willreturn }
+; IS__TUNIT____: attributes #[[ATTR13:[0-9]+]] = { inaccessiblememonly nofree nosync nounwind speculatable willreturn }
+; IS__TUNIT____: attributes #[[ATTR14:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn }
+; IS__TUNIT____: attributes #[[ATTR15]] = { nofree nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR16]] = { nounwind }
+; IS__TUNIT____: attributes #[[ATTR17]] = { willreturn }
+; IS__TUNIT____: attributes #[[ATTR18]] = { readnone willreturn }
 ;.
 ; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn writeonly }
@@ -902,17 +904,15 @@ declare i8* @llvm.strip.invariant.group.p0i8(i8*)
 ; IS__CGSCC____: attributes #[[ATTR7]] = { nofree nosync nounwind writeonly }
 ; IS__CGSCC____: attributes #[[ATTR8]] = { nofree norecurse noreturn nosync nounwind readnone willreturn }
 ; IS__CGSCC____: attributes #[[ATTR9]] = { argmemonly nofree norecurse nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR10]] = { inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR12]] = { argmemonly nofree nosync nounwind willreturn writeonly }
-; IS__CGSCC____: attributes #[[ATTR13]] = { nofree nosync nounwind willreturn writeonly }
-; IS__CGSCC____: attributes #[[ATTR14]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn }
-; IS__CGSCC____: attributes #[[ATTR15:[0-9]+]] = { nounwind readonly willreturn }
-; IS__CGSCC____: attributes #[[ATTR16]] = { nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR17:[0-9]+]] = { inaccessiblememonly nofree nosync nounwind speculatable willreturn }
-; IS__CGSCC____: attributes #[[ATTR18:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn }
-; IS__CGSCC____: attributes #[[ATTR19]] = { nounwind willreturn writeonly }
-; IS__CGSCC____: attributes #[[ATTR20]] = { readnone willreturn }
-; IS__CGSCC____: attributes #[[ATTR21]] = { nounwind }
-; IS__CGSCC____: attributes #[[ATTR22]] = { willreturn }
+; IS__CGSCC____: attributes #[[ATTR10]] = { nofree nosync nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR11]] = { nofree nosync nounwind willreturn writeonly }
+; IS__CGSCC____: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind null_pointer_is_valid readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR13:[0-9]+]] = { nounwind readonly willreturn }
+; IS__CGSCC____: attributes #[[ATTR14]] = { nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR15:[0-9]+]] = { inaccessiblememonly nofree nosync nounwind speculatable willreturn }
+; IS__CGSCC____: attributes #[[ATTR16:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn }
+; IS__CGSCC____: attributes #[[ATTR17]] = { nounwind willreturn writeonly }
+; IS__CGSCC____: attributes #[[ATTR18]] = { readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR19]] = { nounwind }
+; IS__CGSCC____: attributes #[[ATTR20]] = { willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/nocapture-2.ll b/llvm/test/Transforms/Attributor/nocapture-2.ll
index 5257679c580a..1435e4e83441 100644
--- a/llvm/test/Transforms/Attributor/nocapture-2.ll
+++ b/llvm/test/Transforms/Attributor/nocapture-2.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=11 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 ;
@@ -219,9 +219,9 @@ define float* @scc_A(i32* dereferenceable_or_null(4) %a) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to i16*
 ; CHECK-NEXT:    [[CALL:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[CALL]] to double*
-; CHECK-NEXT:    [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR2]]
+; CHECK-NEXT:    [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i32*
-; CHECK-NEXT:    [[CALL2:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]]
+; CHECK-NEXT:    [[CALL2:%.*]] = call float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[CALL2]] to i32*
 ; CHECK-NEXT:    br label [[COND_END:%.*]]
 ; CHECK:       cond.false:
@@ -254,7 +254,6 @@ cond.end:                                         ; preds = %cond.false, %cond.t
   ret float* %4
 }
 
-; FIXME: the call1 below to scc_B should return dereferenceable_or_null(8) (as the callee does). Something prevented that deduction and needs to be investigated.
 define i64* @scc_B(double* dereferenceable_or_null(8) %a) {
 ; CHECK: Function Attrs: nofree nosync nounwind readnone
 ; CHECK-LABEL: define {{[^@]+}}@scc_B
@@ -266,9 +265,9 @@ define i64* @scc_B(double* dereferenceable_or_null(8) %a) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A]] to i32*
 ; CHECK-NEXT:    [[CALL:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[CALL]] to double*
-; CHECK-NEXT:    [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR2]]
+; CHECK-NEXT:    [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP1]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[CALL1]] to i16*
-; CHECK-NEXT:    [[CALL2:%.*]] = call dereferenceable_or_null(4) i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]]
+; CHECK-NEXT:    [[CALL2:%.*]] = call i8* @scc_C(i16* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]]
 ; CHECK-NEXT:    br label [[COND_END:%.*]]
 ; CHECK:       cond.false:
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[A]] to i8*
@@ -313,7 +312,7 @@ define i8* @scc_C(i16* dereferenceable_or_null(2) %a) {
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 ; CHECK:       cond.true:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[A]] to double*
-; CHECK-NEXT:    [[CALL1:%.*]] = call dereferenceable_or_null(4) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]]
+; CHECK-NEXT:    [[CALL1:%.*]] = call dereferenceable_or_null(8) i64* @scc_B(double* noalias nofree readnone dereferenceable_or_null(8) "no-capture-maybe-returned" [[TMP0]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[CALL1]] to i8*
 ; CHECK-NEXT:    br label [[COND_END:%.*]]
 ; CHECK:       cond.false:
@@ -322,7 +321,7 @@ define i8* @scc_C(i16* dereferenceable_or_null(2) %a) {
 ; CHECK:       cond.end:
 ; CHECK-NEXT:    [[COND:%.*]] = phi i8* [ [[TMP1]], [[COND_TRUE]] ], [ [[CALL2]], [[COND_FALSE]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[COND]] to i32*
-; CHECK-NEXT:    [[CALL3:%.*]] = call dereferenceable_or_null(4) float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]]
+; CHECK-NEXT:    [[CALL3:%.*]] = call float* @scc_A(i32* noalias nofree readnone dereferenceable_or_null(4) "no-capture-maybe-returned" [[TMP2]]) #[[ATTR2]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[CALL3]] to i8*
 ; CHECK-NEXT:    ret i8* [[TMP3]]
 ;
@@ -498,14 +497,14 @@ define i64* @negative_test_not_captured_but_returned_call_0a(i64* %a) #0 {
 ; IS__TUNIT____-SAME: (i64* nofree returned writeonly align 8 "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]]
-; IS__TUNIT____-NEXT:    ret i64* [[A]]
+; IS__TUNIT____-NEXT:    ret i64* [[CALL]]
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind uwtable willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@negative_test_not_captured_but_returned_call_0a
 ; IS__CGSCC____-SAME: (i64* nofree noundef nonnull returned writeonly align 8 dereferenceable(8) "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR4]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR9]]
-; IS__CGSCC____-NEXT:    ret i64* [[A]]
+; IS__CGSCC____-NEXT:    ret i64* [[CALL]]
 ;
 entry:
   %call = call i64* @not_captured_but_returned_0(i64* %a)
@@ -525,7 +524,7 @@ define void @negative_test_not_captured_but_returned_call_0b(i64* %a) #0 {
 ; IS__TUNIT____-SAME: (i64* nofree writeonly align 8 [[A:%.*]]) #[[ATTR4]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree writeonly align 8 "no-capture-maybe-returned" [[A]]) #[[ATTR9]]
-; IS__TUNIT____-NEXT:    [[TMP0:%.*]] = ptrtoint i64* [[A]] to i64
+; IS__TUNIT____-NEXT:    [[TMP0:%.*]] = ptrtoint i64* [[CALL]] to i64
 ; IS__TUNIT____-NEXT:    store i64 [[TMP0]], i64* [[A]], align 8
 ; IS__TUNIT____-NEXT:    ret void
 ;
@@ -534,7 +533,7 @@ define void @negative_test_not_captured_but_returned_call_0b(i64* %a) #0 {
 ; IS__CGSCC____-SAME: (i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR4]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i64* @not_captured_but_returned_0(i64* nofree noundef nonnull writeonly align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR9]]
-; IS__CGSCC____-NEXT:    [[TMP0:%.*]] = ptrtoint i64* [[A]] to i64
+; IS__CGSCC____-NEXT:    [[TMP0:%.*]] = ptrtoint i64* [[CALL]] to i64
 ; IS__CGSCC____-NEXT:    store i64 [[TMP0]], i64* [[A]], align 8
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -702,7 +701,7 @@ define i32* @not_captured_by_readonly_call_not_returned_either1(i32* %b, i32* re
 ; CHECK-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly returned [[R:%.*]]) #[[ATTR8:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32* @readonly_unknown(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR8]]
-; CHECK-NEXT:    ret i32* [[R]]
+; CHECK-NEXT:    ret i32* [[CALL]]
 ;
 entry:
   %call = call i32* @readonly_unknown(i32* %b, i32* %r) nounwind
@@ -716,7 +715,7 @@ define i32* @not_captured_by_readonly_call_not_returned_either2(i32* %b, i32* %r
 ; CHECK-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly returned [[R:%.*]]) #[[ATTR8]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR8]]
-; CHECK-NEXT:    ret i32* [[R]]
+; CHECK-NEXT:    ret i32* [[CALL]]
 ;
 entry:
   %call = call i32* @readonly_unknown_r1a(i32* %b, i32* %r) nounwind
@@ -730,7 +729,7 @@ define i32* @not_captured_by_readonly_call_not_returned_either3(i32* %b, i32* %r
 ; CHECK-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly returned [[R:%.*]]) #[[ATTR8]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32* @readonly_unknown_r1b(i32* nocapture readonly [[B]], i32* readonly [[R]]) #[[ATTR8]]
-; CHECK-NEXT:    ret i32* [[R]]
+; CHECK-NEXT:    ret i32* [[CALL]]
 ;
 entry:
   %call = call i32* @readonly_unknown_r1b(i32* %b, i32* %r)
@@ -743,7 +742,7 @@ define i32* @not_captured_by_readonly_call_not_returned_either4(i32* %b, i32* %r
 ; CHECK-SAME: (i32* nocapture readonly [[B:%.*]], i32* readonly returned [[R:%.*]]) #[[ATTR8]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32* @readonly_unknown_r1a(i32* readonly [[B]], i32* readonly [[R]]) #[[ATTR6]]
-; CHECK-NEXT:    ret i32* [[R]]
+; CHECK-NEXT:    ret i32* [[CALL]]
 ;
 entry:
   %call = call i32* @readonly_unknown_r1a(i32* %b, i32* %r)

diff  --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index e3457357940d..a214084af5e0 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll
index f1b0de6499b9..d57121f7bb39 100644
--- a/llvm/test/Transforms/Attributor/nonnull.ll
+++ b/llvm/test/Transforms/Attributor/nonnull.ll
@@ -36,6 +36,7 @@ define i8* @test2(i8* nonnull %p) {
 }
 
 define i8* @test2A(i1 %c, i8* %ret) {
+; ATTRIBUTOR: define nonnull i8* @test2A(i1 %c, i8* nofree nonnull readnone returned %ret)
 ; NOT_CGSCC_OPM: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
 ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test2A
 ; NOT_CGSCC_OPM-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -68,6 +69,7 @@ B:
 }
 
 define i8* @test2B(i1 %c, i8* %ret) {
+; ATTRIBUTOR: define nonnull dereferenceable(4) i8* @test2B(i1 %c, i8* nofree nonnull readnone returned dereferenceable(4) %ret)
 ; NOT_CGSCC_OPM: Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
 ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test2B
 ; NOT_CGSCC_OPM-SAME: (i1 [[C:%.*]], i8* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[RET:%.*]]) #[[ATTR0]] {
@@ -144,30 +146,22 @@ define i8* @test3(i1 %c) {
 ; nonnull if neither can ever return null.  (In this case, they
 ; just never return period.)
 define i8* @test4_helper() {
-; NOT_CGSCC_NPM: Function Attrs: nofree noreturn nosync nounwind readnone willreturn
-; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test4_helper
-; NOT_CGSCC_NPM-SAME: () #[[ATTR2:[0-9]+]] {
-; NOT_CGSCC_NPM-NEXT:    unreachable
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test4_helper
-; IS__CGSCC_NPM-SAME: () #[[ATTR2:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT:    unreachable
+; CHECK: Function Attrs: nofree noreturn nosync nounwind readnone
+; CHECK-LABEL: define {{[^@]+}}@test4_helper
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[RET:%.*]] = call i8* @test4() #[[ATTR2]]
+; CHECK-NEXT:    unreachable
 ;
   %ret = call i8* @test4()
   ret i8* %ret
 }
 
 define i8* @test4() {
-; NOT_CGSCC_NPM: Function Attrs: nofree noreturn nosync nounwind readnone willreturn
-; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@test4
-; NOT_CGSCC_NPM-SAME: () #[[ATTR2]] {
-; NOT_CGSCC_NPM-NEXT:    unreachable
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test4
-; IS__CGSCC_NPM-SAME: () #[[ATTR2]] {
-; IS__CGSCC_NPM-NEXT:    unreachable
+; CHECK: Function Attrs: nofree noreturn nosync nounwind readnone
+; CHECK-LABEL: define {{[^@]+}}@test4
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    [[RET:%.*]] = call i8* @test4_helper() #[[ATTR2]]
+; CHECK-NEXT:    unreachable
 ;
   %ret = call i8* @test4_helper()
   ret i8* %ret
@@ -233,7 +227,6 @@ define i8* @test5(i1 %c) {
 
 ; Local analysis, but going through a self recursive phi
 define i8* @test6a() {
-;
 ; NOT_CGSCC_OPM: Function Attrs: noreturn
 ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test6a
 ; NOT_CGSCC_OPM-SAME: () #[[ATTR3:[0-9]+]] {
@@ -292,12 +285,14 @@ define i8* @test7(i8* %a) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test7
 ; IS__TUNIT____-SAME: (i8* nofree readnone returned "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i8* [[A]]
+; IS__TUNIT____-NEXT:    [[B:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 0
+; IS__TUNIT____-NEXT:    ret i8* [[B]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test7
 ; IS__CGSCC____-SAME: (i8* nofree readnone returned "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i8* [[A]]
+; IS__CGSCC____-NEXT:    [[B:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 0
+; IS__CGSCC____-NEXT:    ret i8* [[B]]
 ;
   %b = getelementptr inbounds i8, i8* %a, i64 0
   ret i8* %b
@@ -427,7 +422,6 @@ define void @test13_helper() {
   ret void
 }
 define internal void @test13(i8* %a, i8* %b, i8* %c) {
-;
 ; NOT_CGSCC_OPM: Function Attrs: nounwind
 ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@test13
 ; NOT_CGSCC_OPM-SAME: (i8* noalias nocapture nofree nonnull readnone [[A:%.*]], i8* noalias nocapture nofree readnone [[B:%.*]], i8* noalias nocapture nofree readnone [[C:%.*]]) #[[ATTR4]] {
@@ -641,7 +635,6 @@ if.else:
 ; fun1(nonnull %a)
 ; We can say that %a is nonnull
 define void @f17(i8* %a, i8 %c) {
-;
 ; NOT_CGSCC_OPM: Function Attrs: nounwind willreturn
 ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@f17
 ; NOT_CGSCC_OPM-SAME: (i8* nonnull [[A:%.*]], i8 [[C:%.*]]) #[[ATTR6]] {
@@ -1113,7 +1106,6 @@ define internal void @called_by_weak(i32* %a) {
 
 ; Check we do not annotate the function interface of this weak function.
 define weak_odr void @weak_caller(i32* nonnull %a) {
-;
 ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@weak_caller
 ; NOT_CGSCC_OPM-SAME: (i32* nonnull [[A:%.*]]) {
 ; NOT_CGSCC_OPM-NEXT:    call void @called_by_weak(i32* noalias nocapture nonnull readnone [[A]]) #[[ATTR4]]
@@ -1164,7 +1156,6 @@ define internal void @naked(i32* dereferenceable(4) %a) naked {
 }
 ; Avoid nonnull as we do not touch optnone
 define internal void @optnone(i32* dereferenceable(4) %a) optnone noinline {
-;
 ; NOT_CGSCC_OPM: Function Attrs: noinline optnone
 ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@optnone
 ; NOT_CGSCC_OPM-SAME: (i32* dereferenceable(4) [[A:%.*]]) #[[ATTR10:[0-9]+]] {
@@ -1639,7 +1630,6 @@ define void @nonnull_assume_neg(i8* %arg) {
 ; ATTRIBUTOR-NEXT:    call void @use_i8_ptr_ret(i8* noalias nocapture nofree nonnull readnone [[ARG]])
 ; ATTRIBUTOR-NEXT:    ret void
 ;
-;
 ; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@nonnull_assume_neg
 ; NOT_CGSCC_OPM-SAME: (i8* nocapture nofree readnone [[ARG:%.*]]) {
 ; NOT_CGSCC_OPM-NEXT:    [[TMP1:%.*]] = call i8* @unknown()
@@ -1726,7 +1716,7 @@ attributes #1 = { nounwind willreturn}
 ;.
 ; IS__TUNIT____: attributes #[[ATTR0]] = { inaccessiblememonly nofree nosync nounwind willreturn }
 ; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT____: attributes #[[ATTR2]] = { nofree noreturn nosync nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR2]] = { nofree noreturn nosync nounwind readnone }
 ; IS__TUNIT____: attributes #[[ATTR3]] = { noreturn }
 ; IS__TUNIT____: attributes #[[ATTR4]] = { nounwind }
 ; IS__TUNIT____: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind readonly }
@@ -1742,7 +1732,7 @@ attributes #1 = { nounwind willreturn}
 ;.
 ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { inaccessiblememonly nofree nosync nounwind willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree noreturn nosync nounwind readnone willreturn }
+; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree noreturn nosync nounwind readnone }
 ; IS__CGSCC_OPM: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR4]] = { noreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR5]] = { nounwind }
@@ -1759,7 +1749,7 @@ attributes #1 = { nounwind willreturn}
 ;.
 ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { inaccessiblememonly nofree nosync nounwind willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree norecurse noreturn nosync nounwind readnone willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree noreturn nosync nounwind readnone }
 ; IS__CGSCC_NPM: attributes #[[ATTR3]] = { noreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR4]] = { nounwind }
 ; IS__CGSCC_NPM: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind readonly }

diff  --git a/llvm/test/Transforms/Attributor/norecurse.ll b/llvm/test/Transforms/Attributor/norecurse.ll
index 316ed29e2ca9..c053c2f1578b 100644
--- a/llvm/test/Transforms/Attributor/norecurse.ll
+++ b/llvm/test/Transforms/Attributor/norecurse.ll
@@ -34,56 +34,32 @@ define i32 @self_rec() {
 }
 
 define i32 @indirect_rec() {
-; IS__TUNIT____: Function Attrs: nofree noreturn nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@indirect_rec
-; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    unreachable
-;
-; IS__CGSCC_OPM: Function Attrs: nofree noreturn nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@indirect_rec
-; IS__CGSCC_OPM-SAME: () #[[ATTR2:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:    unreachable
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@indirect_rec
-; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:    unreachable
+; CHECK: Function Attrs: nofree noreturn nosync nounwind readnone
+; CHECK-LABEL: define {{[^@]+}}@indirect_rec
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @indirect_rec2() #[[ATTR2]]
+; CHECK-NEXT:    unreachable
 ;
   %a = call i32 @indirect_rec2()
   ret i32 %a
 }
 define i32 @indirect_rec2() {
-; IS__TUNIT____: Function Attrs: nofree noreturn nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@indirect_rec2
-; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    unreachable
-;
-; IS__CGSCC_OPM: Function Attrs: nofree noreturn nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@indirect_rec2
-; IS__CGSCC_OPM-SAME: () #[[ATTR2]] {
-; IS__CGSCC_OPM-NEXT:    unreachable
-;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse noreturn nosync nounwind readnone willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@indirect_rec2
-; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:    unreachable
+; CHECK: Function Attrs: nofree noreturn nosync nounwind readnone
+; CHECK-LABEL: define {{[^@]+}}@indirect_rec2
+; CHECK-SAME: () #[[ATTR2]] {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @indirect_rec() #[[ATTR2]]
+; CHECK-NEXT:    unreachable
 ;
   %a = call i32 @indirect_rec()
   ret i32 %a
 }
 
 define i32 @extern() {
-; NOT_CGSCC_OPM: Function Attrs: nosync readnone
-; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@extern
-; NOT_CGSCC_OPM-SAME: () #[[ATTR2:[0-9]+]] {
-; NOT_CGSCC_OPM-NEXT:    [[A:%.*]] = call i32 @k()
-; NOT_CGSCC_OPM-NEXT:    ret i32 [[A]]
-;
-; IS__CGSCC_OPM: Function Attrs: nosync readnone
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@extern
-; IS__CGSCC_OPM-SAME: () #[[ATTR3:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = call i32 @k()
-; IS__CGSCC_OPM-NEXT:    ret i32 [[A]]
+; CHECK: Function Attrs: nosync readnone
+; CHECK-LABEL: define {{[^@]+}}@extern
+; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @k()
+; CHECK-NEXT:    ret i32 [[A]]
 ;
   %a = call i32 @k()
   ret i32 %a
@@ -94,17 +70,11 @@ define i32 @extern() {
 declare i32 @k() readnone
 
 define void @intrinsic(i8* %dest, i8* %src, i32 %len) {
-; NOT_CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn
-; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@intrinsic
-; NOT_CGSCC_OPM-SAME: (i8* nocapture nofree writeonly [[DEST:%.*]], i8* nocapture nofree readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR4:[0-9]+]] {
-; NOT_CGSCC_OPM-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR10:[0-9]+]]
-; NOT_CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@intrinsic
-; IS__CGSCC_OPM-SAME: (i8* nocapture nofree writeonly [[DEST:%.*]], i8* nocapture nofree readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR5:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR11:[0-9]+]]
-; IS__CGSCC_OPM-NEXT:    ret void
+; CHECK: Function Attrs: argmemonly nofree nosync nounwind willreturn
+; CHECK-LABEL: define {{[^@]+}}@intrinsic
+; CHECK-SAME: (i8* nocapture nofree writeonly [[DEST:%.*]], i8* nocapture nofree readonly [[SRC:%.*]], i32 [[LEN:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture nofree writeonly [[DEST]], i8* noalias nocapture nofree readonly [[SRC]], i32 [[LEN]], i1 noundef false) #[[ATTR11:[0-9]+]]
+; CHECK-NEXT:    ret void
 ;
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 false)
   ret void
@@ -117,21 +87,15 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i1)
 define internal i32 @called_by_norecurse() {
 ; IS__TUNIT____: Function Attrs: nosync readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@called_by_norecurse
-; IS__TUNIT____-SAME: () #[[ATTR2]] {
+; IS__TUNIT____-SAME: () #[[ATTR3]] {
 ; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @k()
 ; IS__TUNIT____-NEXT:    ret i32 undef
 ;
-; IS__CGSCC_OPM: Function Attrs: norecurse nosync readnone
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@called_by_norecurse
-; IS__CGSCC_OPM-SAME: () #[[ATTR7:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = call i32 @k()
-; IS__CGSCC_OPM-NEXT:    ret i32 undef
-;
-; IS__CGSCC_NPM: Function Attrs: norecurse nosync readnone
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@called_by_norecurse
-; IS__CGSCC_NPM-SAME: () #[[ATTR6:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = call i32 @k()
-; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC____: Function Attrs: norecurse nosync readnone
+; IS__CGSCC____-LABEL: define {{[^@]+}}@called_by_norecurse
+; IS__CGSCC____-SAME: () #[[ATTR7:[0-9]+]] {
+; IS__CGSCC____-NEXT:    [[A:%.*]] = call i32 @k()
+; IS__CGSCC____-NEXT:    ret i32 undef
 ;
   %a = call i32 @k()
   ret i32 %a
@@ -139,38 +103,26 @@ define internal i32 @called_by_norecurse() {
 define void @m() norecurse {
 ; IS__TUNIT____: Function Attrs: norecurse nosync readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@m
-; IS__TUNIT____-SAME: () #[[ATTR6:[0-9]+]] {
-; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse() #[[ATTR2]]
+; IS__TUNIT____-SAME: () #[[ATTR7:[0-9]+]] {
+; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse() #[[ATTR3]]
 ; IS__TUNIT____-NEXT:    ret void
 ;
-; IS__CGSCC_OPM: Function Attrs: norecurse nosync readnone
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@m
-; IS__CGSCC_OPM-SAME: () #[[ATTR7]] {
-; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse()
-; IS__CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_NPM: Function Attrs: norecurse nosync readnone
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@m
-; IS__CGSCC_NPM-SAME: () #[[ATTR6]] {
-; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse()
-; IS__CGSCC_NPM-NEXT:    ret void
+; IS__CGSCC____: Function Attrs: norecurse nosync readnone
+; IS__CGSCC____-LABEL: define {{[^@]+}}@m
+; IS__CGSCC____-SAME: () #[[ATTR7]] {
+; IS__CGSCC____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse()
+; IS__CGSCC____-NEXT:    ret void
 ;
   %a = call i32 @called_by_norecurse()
   ret void
 }
 
 define internal i32 @called_by_norecurse_indirectly() {
-; NOT_CGSCC_OPM: Function Attrs: nosync readnone
-; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly
-; NOT_CGSCC_OPM-SAME: () #[[ATTR2]] {
-; NOT_CGSCC_OPM-NEXT:    [[A:%.*]] = call i32 @k()
-; NOT_CGSCC_OPM-NEXT:    ret i32 [[A]]
-;
-; IS__CGSCC_OPM: Function Attrs: nosync readnone
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly
-; IS__CGSCC_OPM-SAME: () #[[ATTR3]] {
-; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = call i32 @k()
-; IS__CGSCC_OPM-NEXT:    ret i32 [[A]]
+; CHECK: Function Attrs: nosync readnone
+; CHECK-LABEL: define {{[^@]+}}@called_by_norecurse_indirectly
+; CHECK-SAME: () #[[ATTR3]] {
+; CHECK-NEXT:    [[A:%.*]] = call i32 @k()
+; CHECK-NEXT:    ret i32 [[A]]
 ;
   %a = call i32 @k()
   ret i32 %a
@@ -178,21 +130,15 @@ define internal i32 @called_by_norecurse_indirectly() {
 define internal i32 @o() {
 ; IS__TUNIT____: Function Attrs: nosync readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@o
-; IS__TUNIT____-SAME: () #[[ATTR2]] {
-; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse_indirectly() #[[ATTR2]]
+; IS__TUNIT____-SAME: () #[[ATTR3]] {
+; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse_indirectly() #[[ATTR3]]
 ; IS__TUNIT____-NEXT:    ret i32 [[A]]
 ;
-; IS__CGSCC_OPM: Function Attrs: norecurse nosync readnone
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@o
-; IS__CGSCC_OPM-SAME: () #[[ATTR7]] {
-; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse_indirectly()
-; IS__CGSCC_OPM-NEXT:    ret i32 [[A]]
-;
-; IS__CGSCC_NPM: Function Attrs: norecurse nosync readnone
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@o
-; IS__CGSCC_NPM-SAME: () #[[ATTR6]] {
-; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse_indirectly()
-; IS__CGSCC_NPM-NEXT:    ret i32 [[A]]
+; IS__CGSCC____: Function Attrs: norecurse nosync readnone
+; IS__CGSCC____-LABEL: define {{[^@]+}}@o
+; IS__CGSCC____-SAME: () #[[ATTR7]] {
+; IS__CGSCC____-NEXT:    [[A:%.*]] = call i32 @called_by_norecurse_indirectly()
+; IS__CGSCC____-NEXT:    ret i32 [[A]]
 ;
   %a = call i32 @called_by_norecurse_indirectly()
   ret i32 %a
@@ -200,56 +146,35 @@ define internal i32 @o() {
 define i32 @p() norecurse {
 ; IS__TUNIT____: Function Attrs: norecurse nosync readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@p
-; IS__TUNIT____-SAME: () #[[ATTR6]] {
-; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @o() #[[ATTR2]]
+; IS__TUNIT____-SAME: () #[[ATTR7]] {
+; IS__TUNIT____-NEXT:    [[A:%.*]] = call i32 @o() #[[ATTR3]]
 ; IS__TUNIT____-NEXT:    ret i32 [[A]]
 ;
-; IS__CGSCC_OPM: Function Attrs: norecurse nosync readnone
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@p
-; IS__CGSCC_OPM-SAME: () #[[ATTR7]] {
-; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = call i32 @o()
-; IS__CGSCC_OPM-NEXT:    ret i32 [[A]]
-;
-; IS__CGSCC_NPM: Function Attrs: norecurse nosync readnone
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@p
-; IS__CGSCC_NPM-SAME: () #[[ATTR6]] {
-; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = call i32 @o()
-; IS__CGSCC_NPM-NEXT:    ret i32 [[A]]
+; IS__CGSCC____: Function Attrs: norecurse nosync readnone
+; IS__CGSCC____-LABEL: define {{[^@]+}}@p
+; IS__CGSCC____-SAME: () #[[ATTR7]] {
+; IS__CGSCC____-NEXT:    [[A:%.*]] = call i32 @o()
+; IS__CGSCC____-NEXT:    ret i32 [[A]]
 ;
   %a = call i32 @o()
   ret i32 %a
 }
 
 define void @f(i32 %x)  {
-; NOT_CGSCC_OPM: Function Attrs: nofree nosync nounwind readnone
-; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@f
-; NOT_CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR7:[0-9]+]] {
-; NOT_CGSCC_OPM-NEXT:  entry:
-; NOT_CGSCC_OPM-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
-; NOT_CGSCC_OPM-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; NOT_CGSCC_OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
-; NOT_CGSCC_OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-; NOT_CGSCC_OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; NOT_CGSCC_OPM:       if.then:
-; NOT_CGSCC_OPM-NEXT:    call void @g() #[[ATTR8:[0-9]+]]
-; NOT_CGSCC_OPM-NEXT:    br label [[IF_END]]
-; NOT_CGSCC_OPM:       if.end:
-; NOT_CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_OPM: Function Attrs: nofree nosync nounwind readnone
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f
-; IS__CGSCC_OPM-SAME: (i32 [[X:%.*]]) #[[ATTR8:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
-; IS__CGSCC_OPM-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
-; IS__CGSCC_OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
-; IS__CGSCC_OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; IS__CGSCC_OPM:       if.then:
-; IS__CGSCC_OPM-NEXT:    call void @g() #[[ATTR9:[0-9]+]]
-; IS__CGSCC_OPM-NEXT:    br label [[IF_END]]
-; IS__CGSCC_OPM:       if.end:
-; IS__CGSCC_OPM-NEXT:    ret void
+; CHECK: Function Attrs: nofree nosync nounwind readnone
+; CHECK-LABEL: define {{[^@]+}}@f
+; CHECK-SAME: (i32 [[X:%.*]]) #[[ATTR8:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    call void @g() #[[ATTR9:[0-9]+]]
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
 ;
 entry:
   %x.addr = alloca i32, align 4
@@ -267,19 +192,12 @@ if.end:
 }
 
 define void @g() norecurse {
-; NOT_CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone
-; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@g
-; NOT_CGSCC_OPM-SAME: () #[[ATTR8]] {
-; NOT_CGSCC_OPM-NEXT:  entry:
-; NOT_CGSCC_OPM-NEXT:    call void @f(i32 noundef 0) #[[ATTR7]]
-; NOT_CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@g
-; IS__CGSCC_OPM-SAME: () #[[ATTR9]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    call void @f(i32 noundef 0) #[[ATTR8]]
-; IS__CGSCC_OPM-NEXT:    ret void
+; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: () #[[ATTR9]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @f(i32 noundef 0) #[[ATTR8]]
+; CHECK-NEXT:    ret void
 ;
 entry:
   call void @f(i32 0)
@@ -305,17 +223,11 @@ define i32 @eval_func1(i32 (i32)* , i32) local_unnamed_addr {
 }
 
 define i32 @eval_func2(i32 (i32)* , i32) local_unnamed_addr null_pointer_is_valid{
-; NOT_CGSCC_OPM: Function Attrs: null_pointer_is_valid
-; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@eval_func2
-; NOT_CGSCC_OPM-SAME: (i32 (i32)* nocapture nofree noundef [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR9:[0-9]+]] {
-; NOT_CGSCC_OPM-NEXT:    [[TMP3:%.*]] = tail call i32 [[TMP0]](i32 [[TMP1]])
-; NOT_CGSCC_OPM-NEXT:    ret i32 [[TMP3]]
-;
-; IS__CGSCC_OPM: Function Attrs: null_pointer_is_valid
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@eval_func2
-; IS__CGSCC_OPM-SAME: (i32 (i32)* nocapture nofree noundef [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR10:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:    [[TMP3:%.*]] = tail call i32 [[TMP0]](i32 [[TMP1]])
-; IS__CGSCC_OPM-NEXT:    ret i32 [[TMP3]]
+; CHECK: Function Attrs: null_pointer_is_valid
+; CHECK-LABEL: define {{[^@]+}}@eval_func2
+; CHECK-SAME: (i32 (i32)* nocapture nofree noundef [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR10:[0-9]+]] {
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 [[TMP0]](i32 [[TMP1]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %3 = tail call i32 %0(i32 %1) #2
   ret i32 %3
@@ -347,38 +259,27 @@ Dead:
 ;.
 ; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn }
 ; IS__TUNIT____: attributes #[[ATTR1]] = { nofree noreturn nosync nounwind readnone willreturn }
-; IS__TUNIT____: attributes #[[ATTR2]] = { nosync readnone }
-; IS__TUNIT____: attributes #[[ATTR3:[0-9]+]] = { readnone }
-; IS__TUNIT____: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind willreturn }
-; IS__TUNIT____: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn }
-; IS__TUNIT____: attributes #[[ATTR6]] = { norecurse nosync readnone }
-; IS__TUNIT____: attributes #[[ATTR7]] = { nofree nosync nounwind readnone }
-; IS__TUNIT____: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone }
-; IS__TUNIT____: attributes #[[ATTR9]] = { null_pointer_is_valid }
-; IS__TUNIT____: attributes #[[ATTR10]] = { willreturn }
-;.
-; IS__CGSCC_OPM: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind readnone willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree noreturn nosync nounwind readnone willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR3]] = { nosync readnone }
-; IS__CGSCC_OPM: attributes #[[ATTR4:[0-9]+]] = { readnone }
-; IS__CGSCC_OPM: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR6:[0-9]+]] = { argmemonly nofree nounwind willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR7]] = { norecurse nosync readnone }
-; IS__CGSCC_OPM: attributes #[[ATTR8]] = { nofree nosync nounwind readnone }
-; IS__CGSCC_OPM: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone }
-; IS__CGSCC_OPM: attributes #[[ATTR10]] = { null_pointer_is_valid }
-; IS__CGSCC_OPM: attributes #[[ATTR11]] = { willreturn }
+; IS__TUNIT____: attributes #[[ATTR2]] = { nofree noreturn nosync nounwind readnone }
+; IS__TUNIT____: attributes #[[ATTR3]] = { nosync readnone }
+; IS__TUNIT____: attributes #[[ATTR4:[0-9]+]] = { readnone }
+; IS__TUNIT____: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn }
+; IS__TUNIT____: attributes #[[ATTR6:[0-9]+]] = { argmemonly nofree nounwind willreturn }
+; IS__TUNIT____: attributes #[[ATTR7]] = { norecurse nosync readnone }
+; IS__TUNIT____: attributes #[[ATTR8]] = { nofree nosync nounwind readnone }
+; IS__TUNIT____: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone }
+; IS__TUNIT____: attributes #[[ATTR10]] = { null_pointer_is_valid }
+; IS__TUNIT____: attributes #[[ATTR11]] = { willreturn }
 ;.
-; IS__CGSCC_NPM: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind readnone willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nosync readnone }
-; IS__CGSCC_NPM: attributes #[[ATTR3:[0-9]+]] = { readnone }
-; IS__CGSCC_NPM: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR5:[0-9]+]] = { argmemonly nofree nounwind willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR6]] = { norecurse nosync readnone }
-; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nofree nosync nounwind readnone }
-; IS__CGSCC_NPM: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind readnone }
-; IS__CGSCC_NPM: attributes #[[ATTR9]] = { null_pointer_is_valid }
-; IS__CGSCC_NPM: attributes #[[ATTR10]] = { willreturn }
+; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse noreturn nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR2]] = { nofree noreturn nosync nounwind readnone }
+; IS__CGSCC____: attributes #[[ATTR3]] = { nosync readnone }
+; IS__CGSCC____: attributes #[[ATTR4:[0-9]+]] = { readnone }
+; IS__CGSCC____: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR6:[0-9]+]] = { argmemonly nofree nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR7]] = { norecurse nosync readnone }
+; IS__CGSCC____: attributes #[[ATTR8]] = { nofree nosync nounwind readnone }
+; IS__CGSCC____: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone }
+; IS__CGSCC____: attributes #[[ATTR10]] = { null_pointer_is_valid }
+; IS__CGSCC____: attributes #[[ATTR11]] = { willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/noundef.ll b/llvm/test/Transforms/Attributor/noundef.ll
index b42c1777c947..5d1ae2974aa3 100644
--- a/llvm/test/Transforms/Attributor/noundef.ll
+++ b/llvm/test/Transforms/Attributor/noundef.ll
@@ -87,7 +87,6 @@ define void @caller_with_unused_arg(i1 %c) {
 }
 
 define internal void @callee_with_dead_arg(i1 %create, ...) {
-;
 ; CHECK-LABEL: define {{[^@]+}}@callee_with_dead_arg
 ; CHECK-SAME: (i1 [[CREATE:%.*]], ...) {
 ; CHECK-NEXT:  entry:
@@ -114,7 +113,6 @@ if.then3:                                         ; preds = %entry
 ; try to come up with a 
diff erent scheme to verify the `noundef` is dropped if
 ; signature rewriting is not happening.
 define void @caller_with_noundef_arg() {
-;
 ; CHECK-LABEL: define {{[^@]+}}@caller_with_noundef_arg() {
 ; CHECK-NEXT:    call void (i1, ...) @callee_with_dead_arg(i1 undef)
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/Attributor/potential.ll b/llvm/test/Transforms/Attributor/potential.ll
index 084d94a524c6..f2b414092f0a 100644
--- a/llvm/test/Transforms/Attributor/potential.ll
+++ b/llvm/test/Transforms/Attributor/potential.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -enable-new-pm=0 -attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=23 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=23 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -enable-new-pm=0 -attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=24 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=24 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -enable-new-pm=0 -attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 ;
@@ -100,10 +100,10 @@ define i32 @potential_test2(i1 %c) {
 define internal i32 @iszero3(i32 %c) {
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@iszero3
-; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR0]] {
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32 undef, 0
-; IS__CGSCC____-NEXT:    [[RET:%.*]] = zext i1 undef to i32
-; IS__CGSCC____-NEXT:    ret i32 undef
+; IS__CGSCC____-SAME: (i32 noundef [[C:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C]], 0
+; IS__CGSCC____-NEXT:    [[RET:%.*]] = zext i1 [[CMP]] to i32
+; IS__CGSCC____-NEXT:    ret i32 [[RET]]
 ;
   %cmp = icmp eq i32 %c, 0
   %ret = zext i1 %cmp to i32
@@ -113,10 +113,10 @@ define internal i32 @iszero3(i32 %c) {
 define internal i32 @less_than_two(i32 %c) {
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@less_than_two
-; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR0]] {
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp slt i32 undef, 2
+; IS__CGSCC____-SAME: (i32 noundef [[C:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp slt i32 [[C]], 2
 ; IS__CGSCC____-NEXT:    [[RET:%.*]] = zext i1 true to i32
-; IS__CGSCC____-NEXT:    ret i32 undef
+; IS__CGSCC____-NEXT:    ret i32 1
 ;
   %cmp = icmp slt i32 %c, 2
   %ret = zext i1 %cmp to i32
@@ -568,15 +568,25 @@ f:
 
 ; FIXME: returned value can be simplified to 0
 define i32 @potential_test11(i1 %c) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test11
-; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
-; IS__TUNIT____-NEXT:    [[ZERO1:%.*]] = call i32 @optimize_undef_1(i1 [[C]]) #[[ATTR0]], !range [[RNG2:![0-9]+]]
-; IS__TUNIT____-NEXT:    [[ZERO2:%.*]] = call i32 @optimize_undef_2(i1 [[C]]) #[[ATTR0]], !range [[RNG3:![0-9]+]]
-; IS__TUNIT____-NEXT:    [[ZERO3:%.*]] = call i32 @optimize_undef_3(i1 [[C]]) #[[ATTR0]], !range [[RNG2]]
-; IS__TUNIT____-NEXT:    [[ACC1:%.*]] = add i32 [[ZERO1]], [[ZERO2]]
-; IS__TUNIT____-NEXT:    [[ACC2:%.*]] = add i32 [[ACC1]], [[ZERO3]]
-; IS__TUNIT____-NEXT:    ret i32 [[ACC2]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@potential_test11
+; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
+; IS__TUNIT_OPM-NEXT:    [[ZERO1:%.*]] = call i32 @optimize_undef_1(i1 [[C]]) #[[ATTR0]], !range [[RNG2:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    [[ZERO2:%.*]] = call i32 @optimize_undef_2(i1 [[C]]) #[[ATTR0]], !range [[RNG3:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    [[ZERO3:%.*]] = call i32 @optimize_undef_3(i1 [[C]]) #[[ATTR0]]
+; IS__TUNIT_OPM-NEXT:    [[ACC1:%.*]] = add i32 [[ZERO1]], [[ZERO2]]
+; IS__TUNIT_OPM-NEXT:    [[ACC2:%.*]] = add i32 [[ACC1]], [[ZERO3]]
+; IS__TUNIT_OPM-NEXT:    ret i32 [[ACC2]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@potential_test11
+; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
+; IS__TUNIT_NPM-NEXT:    [[ZERO1:%.*]] = call i32 @optimize_undef_1(i1 [[C]]) #[[ATTR0]], !range [[RNG2:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    [[ZERO2:%.*]] = call i32 @optimize_undef_2(i1 [[C]]) #[[ATTR0]], !range [[RNG3:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    [[ZERO3:%.*]] = call i32 @optimize_undef_3(i1 [[C]]) #[[ATTR0]], !range [[RNG2]]
+; IS__TUNIT_NPM-NEXT:    [[ACC1:%.*]] = add i32 [[ZERO1]], [[ZERO2]]
+; IS__TUNIT_NPM-NEXT:    [[ACC2:%.*]] = add i32 [[ACC1]], [[ZERO3]]
+; IS__TUNIT_NPM-NEXT:    ret i32 [[ACC2]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@potential_test11
@@ -623,7 +633,7 @@ define i32 @optimize_poison_1(i1 %c) {
 ; IS__TUNIT_NPM:       t:
 ; IS__TUNIT_NPM-NEXT:    ret i32 0
 ; IS__TUNIT_NPM:       f:
-; IS__TUNIT_NPM-NEXT:    ret i32 undef
+; IS__TUNIT_NPM-NEXT:    ret i32 0
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@optimize_poison_1
@@ -641,7 +651,7 @@ define i32 @optimize_poison_1(i1 %c) {
 ; IS__CGSCC_NPM:       t:
 ; IS__CGSCC_NPM-NEXT:    ret i32 0
 ; IS__CGSCC_NPM:       f:
-; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM-NEXT:    ret i32 0
 ;
   br i1 %c, label %t, label %f
 t:
@@ -667,7 +677,7 @@ define i32 @potential_test12(i1 %c) {
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@potential_test12
 ; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_OPM-NEXT:    [[ZERO:%.*]] = call noundef i32 @optimize_poison_1(i1 [[C]]) #[[ATTR2]], !range [[RNG3]]
+; IS__CGSCC_OPM-NEXT:    [[ZERO:%.*]] = call i32 @optimize_poison_1(i1 [[C]]) #[[ATTR2]], !range [[RNG3]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 [[ZERO]]
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
@@ -705,7 +715,7 @@ define i32 @potential_test13_caller1() {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@potential_test13_caller1
 ; IS__TUNIT____-SAME: () #[[ATTR0]] {
-; IS__TUNIT____-NEXT:    [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 0) #[[ATTR0]], !range [[RNG2]]
+; IS__TUNIT____-NEXT:    [[RET:%.*]] = call i32 @potential_test13_callee(i32 noundef 0) #[[ATTR0]], !range [[RNG2:![0-9]+]]
 ; IS__TUNIT____-NEXT:    ret i32 [[RET]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn

diff  --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll
index dcca3a6292d3..8205bf68c563 100644
--- a/llvm/test/Transforms/Attributor/range.ll
+++ b/llvm/test/Transforms/Attributor/range.ll
@@ -435,9 +435,9 @@ entry:
 }
 
 define i32 @test2_check(i32* %p) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test2_check
-; IS__TUNIT____-SAME: (i32* nocapture nofree readnone align 4 [[P:%.*]]) #[[ATTR1:[0-9]+]] {
+; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    br label [[IF_THEN:%.*]]
 ; IS__TUNIT____:       if.then:
@@ -447,9 +447,9 @@ define i32 @test2_check(i32* %p) {
 ; IS__TUNIT____:       return:
 ; IS__TUNIT____-NEXT:    ret i32 2
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test2_check
-; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readnone align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR1:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    br label [[IF_THEN:%.*]]
 ; IS__CGSCC____:       if.then:
@@ -505,7 +505,7 @@ declare dso_local void @unkown()
 define internal i32 @r1(i32) local_unnamed_addr {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@r1
-; IS__TUNIT_OPM-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:    br label [[TMP4:%.*]]
 ; IS__TUNIT_OPM:       1:
 ; IS__TUNIT_OPM-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP7:%.*]], 10000
@@ -524,7 +524,7 @@ define internal i32 @r1(i32) local_unnamed_addr {
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@r1
-; IS__CGSCC_OPM-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; IS__CGSCC_OPM-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
 ; IS__CGSCC_OPM-NEXT:    br label [[TMP4:%.*]]
 ; IS__CGSCC_OPM:       1:
 ; IS__CGSCC_OPM-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[TMP7:%.*]], 10000
@@ -543,7 +543,7 @@ define internal i32 @r1(i32) local_unnamed_addr {
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@r1
-; IS__CGSCC_NPM-SAME: () local_unnamed_addr #[[ATTR1]] {
+; IS__CGSCC_NPM-SAME: () local_unnamed_addr #[[ATTR1:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:    br label [[TMP3:%.*]]
 ; IS__CGSCC_NPM:       1:
 ; IS__CGSCC_NPM-NEXT:    br label [[F:%.*]]
@@ -580,7 +580,7 @@ f:
 define void @f1(i32){
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f1
 ; IS__TUNIT_OPM-SAME: (i32 [[TMP0:%.*]]) {
-; IS__TUNIT_OPM-NEXT:    [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR2]]
+; IS__TUNIT_OPM-NEXT:    [[TMP2:%.*]] = tail call i32 @r1() #[[ATTR1]]
 ; IS__TUNIT_OPM-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], 15
 ; IS__TUNIT_OPM-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
 ; IS__TUNIT_OPM:       4:
@@ -591,7 +591,7 @@ define void @f1(i32){
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f1
-; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
 ; IS__TUNIT_NPM-NEXT:    br label [[TMP3:%.*]]
 ; IS__TUNIT_NPM:       2:
 ; IS__TUNIT_NPM-NEXT:    unreachable
@@ -612,10 +612,11 @@ define void @f1(i32){
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f1
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:    br label [[TMP3:%.*]]
-; IS__CGSCC_NPM:       2:
-; IS__CGSCC_NPM-NEXT:    unreachable
+; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = icmp sgt i32 10, 15
+; IS__CGSCC_NPM-NEXT:    br i1 false, label [[TMP3:%.*]], label [[TMP4:%.*]]
 ; IS__CGSCC_NPM:       3:
+; IS__CGSCC_NPM-NEXT:    unreachable
+; IS__CGSCC_NPM:       4:
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;
   %2 = tail call i32 @r1(i32 %0)
@@ -641,29 +642,53 @@ define void @f1(i32){
 ;   }
 ; }
 define dso_local i32 @test4-f1(i32 %u) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@test4-f1
-; IS__TUNIT____-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
-; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
-; IS__TUNIT____:       if.then:
-; IS__TUNIT____-NEXT:    br label [[RETURN]]
-; IS__TUNIT____:       return:
-; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[U]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ]
-; IS__TUNIT____-NEXT:    ret i32 [[RETVAL_0]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test4-f1
+; IS__TUNIT_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
+; IS__TUNIT_OPM-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; IS__TUNIT_OPM:       if.then:
+; IS__TUNIT_OPM-NEXT:    br label [[RETURN]]
+; IS__TUNIT_OPM:       return:
+; IS__TUNIT_OPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[U]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ]
+; IS__TUNIT_OPM-NEXT:    ret i32 [[RETVAL_0]]
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@test4-f1
-; IS__CGSCC____-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
-; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
-; IS__CGSCC____:       if.then:
-; IS__CGSCC____-NEXT:    br label [[RETURN]]
-; IS__CGSCC____:       return:
-; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[U]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ]
-; IS__CGSCC____-NEXT:    ret i32 [[RETVAL_0]]
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test4-f1
+; IS__TUNIT_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
+; IS__TUNIT_NPM-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; IS__TUNIT_NPM:       if.then:
+; IS__TUNIT_NPM-NEXT:    br label [[RETURN]]
+; IS__TUNIT_NPM:       return:
+; IS__TUNIT_NPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[U]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ]
+; IS__TUNIT_NPM-NEXT:    ret i32 [[RETVAL_0]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test4-f1
+; IS__CGSCC_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__CGSCC_OPM-NEXT:  entry:
+; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
+; IS__CGSCC_OPM-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; IS__CGSCC_OPM:       if.then:
+; IS__CGSCC_OPM-NEXT:    br label [[RETURN]]
+; IS__CGSCC_OPM:       return:
+; IS__CGSCC_OPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[U]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ]
+; IS__CGSCC_OPM-NEXT:    ret i32 [[RETVAL_0]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test4-f1
+; IS__CGSCC_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
+; IS__CGSCC_NPM-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[RETURN:%.*]]
+; IS__CGSCC_NPM:       if.then:
+; IS__CGSCC_NPM-NEXT:    br label [[RETURN]]
+; IS__CGSCC_NPM:       return:
+; IS__CGSCC_NPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[U]], [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ]
+; IS__CGSCC_NPM-NEXT:    ret i32 [[RETVAL_0]]
 ;
 ; FIXME: RETVAL_0 >= 0
 entry:
@@ -680,16 +705,23 @@ return:                                           ; preds = %entry, %if.then
 
 
 define dso_local i32 @test4-g1(i32 %u) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@test4-g1
-; IS__TUNIT____-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR1]]
-; IS__TUNIT____-NEXT:    ret i32 [[CALL]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test4-g1
+; IS__TUNIT_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR2]]
+; IS__TUNIT_OPM-NEXT:    ret i32 [[CALL]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test4-g1
+; IS__TUNIT_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR1]]
+; IS__TUNIT_NPM-NEXT:    ret i32 [[CALL]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test4-g1
-; IS__CGSCC_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR2]] {
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR5:[0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 [[CALL]]
@@ -717,35 +749,65 @@ entry:
 ;   }
 ; }
 define dso_local i32 @test4-f2(i32 %u) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@test4-f2
-; IS__TUNIT____-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
-; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; IS__TUNIT____:       if.then:
-; IS__TUNIT____-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[U]], 1
-; IS__TUNIT____-NEXT:    br label [[RETURN:%.*]]
-; IS__TUNIT____:       if.else:
-; IS__TUNIT____-NEXT:    br label [[RETURN]]
-; IS__TUNIT____:       return:
-; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[IF_ELSE]] ]
-; IS__TUNIT____-NEXT:    ret i32 [[RETVAL_0]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test4-f2
+; IS__TUNIT_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
+; IS__TUNIT_OPM-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; IS__TUNIT_OPM:       if.then:
+; IS__TUNIT_OPM-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[U]], 1
+; IS__TUNIT_OPM-NEXT:    br label [[RETURN:%.*]]
+; IS__TUNIT_OPM:       if.else:
+; IS__TUNIT_OPM-NEXT:    br label [[RETURN]]
+; IS__TUNIT_OPM:       return:
+; IS__TUNIT_OPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[IF_ELSE]] ]
+; IS__TUNIT_OPM-NEXT:    ret i32 [[RETVAL_0]]
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@test4-f2
-; IS__CGSCC____-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
-; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; IS__CGSCC____:       if.then:
-; IS__CGSCC____-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[U]], 1
-; IS__CGSCC____-NEXT:    br label [[RETURN:%.*]]
-; IS__CGSCC____:       if.else:
-; IS__CGSCC____-NEXT:    br label [[RETURN]]
-; IS__CGSCC____:       return:
-; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[IF_ELSE]] ]
-; IS__CGSCC____-NEXT:    ret i32 [[RETVAL_0]]
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test4-f2
+; IS__TUNIT_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
+; IS__TUNIT_NPM-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; IS__TUNIT_NPM:       if.then:
+; IS__TUNIT_NPM-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[U]], 1
+; IS__TUNIT_NPM-NEXT:    br label [[RETURN:%.*]]
+; IS__TUNIT_NPM:       if.else:
+; IS__TUNIT_NPM-NEXT:    br label [[RETURN]]
+; IS__TUNIT_NPM:       return:
+; IS__TUNIT_NPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[IF_ELSE]] ]
+; IS__TUNIT_NPM-NEXT:    ret i32 [[RETVAL_0]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test4-f2
+; IS__CGSCC_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:  entry:
+; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
+; IS__CGSCC_OPM-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; IS__CGSCC_OPM:       if.then:
+; IS__CGSCC_OPM-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[U]], 1
+; IS__CGSCC_OPM-NEXT:    br label [[RETURN:%.*]]
+; IS__CGSCC_OPM:       if.else:
+; IS__CGSCC_OPM-NEXT:    br label [[RETURN]]
+; IS__CGSCC_OPM:       return:
+; IS__CGSCC_OPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[IF_ELSE]] ]
+; IS__CGSCC_OPM-NEXT:    ret i32 [[RETVAL_0]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test4-f2
+; IS__CGSCC_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[U]], -1
+; IS__CGSCC_NPM-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; IS__CGSCC_NPM:       if.then:
+; IS__CGSCC_NPM-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[U]], 1
+; IS__CGSCC_NPM-NEXT:    br label [[RETURN:%.*]]
+; IS__CGSCC_NPM:       if.else:
+; IS__CGSCC_NPM-NEXT:    br label [[RETURN]]
+; IS__CGSCC_NPM:       return:
+; IS__CGSCC_NPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[ADD]], [[IF_THEN]] ], [ 1, [[IF_ELSE]] ]
+; IS__CGSCC_NPM-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
   %cmp = icmp sgt i32 %u, -1
@@ -767,9 +829,9 @@ return:                                           ; preds = %if.else, %if.then
 define dso_local i32 @test4-g2(i32 %u) {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test4-g2
-; IS__TUNIT_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR2]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR1]]
+; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR2]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[CALL]]
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
@@ -781,7 +843,7 @@ define dso_local i32 @test4-g2(i32 %u) {
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test4-g2
-; IS__CGSCC_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_OPM-SAME: (i32 [[U:%.*]]) #[[ATTR2]] {
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR5]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 [[CALL]]
@@ -801,12 +863,12 @@ entry:
 define dso_local i32 @test-5() {
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test-5() {
 ; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = call noundef i32 @rec(i32 noundef 0), !range [[RNG3:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = call i32 @rec(i32 noundef 0), !range [[RNG3:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[CALL]]
 ;
 ; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test-5() {
 ; NOT_TUNIT_OPM-NEXT:  entry:
-; NOT_TUNIT_OPM-NEXT:    [[CALL:%.*]] = call noundef i32 @rec(i32 noundef 0), !range [[RNG4:![0-9]+]]
+; NOT_TUNIT_OPM-NEXT:    [[CALL:%.*]] = call i32 @rec(i32 noundef 0), !range [[RNG4:![0-9]+]]
 ; NOT_TUNIT_OPM-NEXT:    ret i32 [[CALL]]
 ;
 entry:
@@ -814,47 +876,26 @@ entry:
   ret i32 %call
 }
 define internal i32 @rec(i32 %depth) {
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@rec
-; IS__TUNIT_OPM-SAME: (i32 [[DEPTH:%.*]]) {
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 [[DEPTH]])
-; IS__TUNIT_OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[CALL]], 0
-; IS__TUNIT_OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; IS__TUNIT_OPM:       if.then:
-; IS__TUNIT_OPM-NEXT:    br label [[RETURN:%.*]]
-; IS__TUNIT_OPM:       if.end:
-; IS__TUNIT_OPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[DEPTH]], 10
-; IS__TUNIT_OPM-NEXT:    br i1 [[CMP]], label [[IF_THEN1:%.*]], label [[IF_END3:%.*]]
-; IS__TUNIT_OPM:       if.then1:
-; IS__TUNIT_OPM-NEXT:    [[ADD:%.*]] = add nsw i32 [[DEPTH]], 1
-; IS__TUNIT_OPM-NEXT:    [[CALL2:%.*]] = call i32 @rec(i32 [[ADD]]), !range [[RNG3]]
-; IS__TUNIT_OPM-NEXT:    br label [[IF_END3]]
-; IS__TUNIT_OPM:       if.end3:
-; IS__TUNIT_OPM-NEXT:    br label [[RETURN]]
-; IS__TUNIT_OPM:       return:
-; IS__TUNIT_OPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ 1, [[IF_END3]] ]
-; IS__TUNIT_OPM-NEXT:    ret i32 [[RETVAL_0]]
-;
-; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@rec
-; NOT_TUNIT_OPM-SAME: (i32 [[DEPTH:%.*]]) {
-; NOT_TUNIT_OPM-NEXT:  entry:
-; NOT_TUNIT_OPM-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 [[DEPTH]])
-; NOT_TUNIT_OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[CALL]], 0
-; NOT_TUNIT_OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; NOT_TUNIT_OPM:       if.then:
-; NOT_TUNIT_OPM-NEXT:    br label [[RETURN:%.*]]
-; NOT_TUNIT_OPM:       if.end:
-; NOT_TUNIT_OPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[DEPTH]], 10
-; NOT_TUNIT_OPM-NEXT:    br i1 [[CMP]], label [[IF_THEN1:%.*]], label [[IF_END3:%.*]]
-; NOT_TUNIT_OPM:       if.then1:
-; NOT_TUNIT_OPM-NEXT:    [[ADD:%.*]] = add nsw i32 [[DEPTH]], 1
-; NOT_TUNIT_OPM-NEXT:    [[CALL2:%.*]] = call i32 @rec(i32 [[ADD]]), !range [[RNG4]]
-; NOT_TUNIT_OPM-NEXT:    br label [[IF_END3]]
-; NOT_TUNIT_OPM:       if.end3:
-; NOT_TUNIT_OPM-NEXT:    br label [[RETURN]]
-; NOT_TUNIT_OPM:       return:
-; NOT_TUNIT_OPM-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ 1, [[IF_END3]] ]
-; NOT_TUNIT_OPM-NEXT:    ret i32 [[RETVAL_0]]
+; CHECK-LABEL: define {{[^@]+}}@rec
+; CHECK-SAME: (i32 [[DEPTH:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 [[DEPTH]])
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[CALL]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[RETURN:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[DEPTH]], 10
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN1:%.*]], label [[IF_END3:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[DEPTH]], 1
+; CHECK-NEXT:    [[CALL2:%.*]] = call i32 @rec(i32 [[ADD]])
+; CHECK-NEXT:    br label [[IF_END3]]
+; CHECK:       if.end3:
+; CHECK-NEXT:    br label [[RETURN]]
+; CHECK:       return:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ 1, [[IF_END3]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
   %call = call i32 @foo(i32 %depth)
@@ -891,57 +932,109 @@ declare dso_local i32 @foo(i32)
 
 ; FIXME: All but the return is not needed anymore
 define dso_local zeroext i1 @phi(i32 %arg) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@phi
-; IS__TUNIT____-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:  bb:
-; IS__TUNIT____-NEXT:    [[TMP:%.*]] = icmp sgt i32 [[ARG]], 5
-; IS__TUNIT____-NEXT:    br i1 [[TMP]], label [[BB1:%.*]], label [[BB2:%.*]]
-; IS__TUNIT____:       bb1:
-; IS__TUNIT____-NEXT:    br label [[BB3:%.*]]
-; IS__TUNIT____:       bb2:
-; IS__TUNIT____-NEXT:    br label [[BB3]]
-; IS__TUNIT____:       bb3:
-; IS__TUNIT____-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[ARG]], 10
-; IS__TUNIT____-NEXT:    br i1 [[TMP4]], label [[BB5:%.*]], label [[BB7:%.*]]
-; IS__TUNIT____:       bb5:
-; IS__TUNIT____-NEXT:    br label [[BB9:%.*]]
-; IS__TUNIT____:       bb7:
-; IS__TUNIT____-NEXT:    br label [[BB9]]
-; IS__TUNIT____:       bb9:
-; IS__TUNIT____-NEXT:    br label [[BB12:%.*]]
-; IS__TUNIT____:       bb11:
-; IS__TUNIT____-NEXT:    unreachable
-; IS__TUNIT____:       bb12:
-; IS__TUNIT____-NEXT:    br label [[BB13:%.*]]
-; IS__TUNIT____:       bb13:
-; IS__TUNIT____-NEXT:    ret i1 false
-;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@phi
-; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:  bb:
-; IS__CGSCC____-NEXT:    [[TMP:%.*]] = icmp sgt i32 [[ARG]], 5
-; IS__CGSCC____-NEXT:    br i1 [[TMP]], label [[BB1:%.*]], label [[BB2:%.*]]
-; IS__CGSCC____:       bb1:
-; IS__CGSCC____-NEXT:    br label [[BB3:%.*]]
-; IS__CGSCC____:       bb2:
-; IS__CGSCC____-NEXT:    br label [[BB3]]
-; IS__CGSCC____:       bb3:
-; IS__CGSCC____-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[ARG]], 10
-; IS__CGSCC____-NEXT:    br i1 [[TMP4]], label [[BB5:%.*]], label [[BB7:%.*]]
-; IS__CGSCC____:       bb5:
-; IS__CGSCC____-NEXT:    br label [[BB9:%.*]]
-; IS__CGSCC____:       bb7:
-; IS__CGSCC____-NEXT:    br label [[BB9]]
-; IS__CGSCC____:       bb9:
-; IS__CGSCC____-NEXT:    br label [[BB12:%.*]]
-; IS__CGSCC____:       bb11:
-; IS__CGSCC____-NEXT:    unreachable
-; IS__CGSCC____:       bb12:
-; IS__CGSCC____-NEXT:    br label [[BB13:%.*]]
-; IS__CGSCC____:       bb13:
-; IS__CGSCC____-NEXT:    ret i1 false
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@phi
+; IS__TUNIT_OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:  bb:
+; IS__TUNIT_OPM-NEXT:    [[TMP:%.*]] = icmp sgt i32 [[ARG]], 5
+; IS__TUNIT_OPM-NEXT:    br i1 [[TMP]], label [[BB1:%.*]], label [[BB2:%.*]]
+; IS__TUNIT_OPM:       bb1:
+; IS__TUNIT_OPM-NEXT:    br label [[BB3:%.*]]
+; IS__TUNIT_OPM:       bb2:
+; IS__TUNIT_OPM-NEXT:    br label [[BB3]]
+; IS__TUNIT_OPM:       bb3:
+; IS__TUNIT_OPM-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[ARG]], 10
+; IS__TUNIT_OPM-NEXT:    br i1 [[TMP4]], label [[BB5:%.*]], label [[BB7:%.*]]
+; IS__TUNIT_OPM:       bb5:
+; IS__TUNIT_OPM-NEXT:    br label [[BB9:%.*]]
+; IS__TUNIT_OPM:       bb7:
+; IS__TUNIT_OPM-NEXT:    br label [[BB9]]
+; IS__TUNIT_OPM:       bb9:
+; IS__TUNIT_OPM-NEXT:    br label [[BB12:%.*]]
+; IS__TUNIT_OPM:       bb11:
+; IS__TUNIT_OPM-NEXT:    unreachable
+; IS__TUNIT_OPM:       bb12:
+; IS__TUNIT_OPM-NEXT:    br label [[BB13:%.*]]
+; IS__TUNIT_OPM:       bb13:
+; IS__TUNIT_OPM-NEXT:    ret i1 false
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@phi
+; IS__TUNIT_NPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:  bb:
+; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = icmp sgt i32 [[ARG]], 5
+; IS__TUNIT_NPM-NEXT:    br i1 [[TMP]], label [[BB1:%.*]], label [[BB2:%.*]]
+; IS__TUNIT_NPM:       bb1:
+; IS__TUNIT_NPM-NEXT:    br label [[BB3:%.*]]
+; IS__TUNIT_NPM:       bb2:
+; IS__TUNIT_NPM-NEXT:    br label [[BB3]]
+; IS__TUNIT_NPM:       bb3:
+; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[ARG]], 10
+; IS__TUNIT_NPM-NEXT:    br i1 [[TMP4]], label [[BB5:%.*]], label [[BB7:%.*]]
+; IS__TUNIT_NPM:       bb5:
+; IS__TUNIT_NPM-NEXT:    br label [[BB9:%.*]]
+; IS__TUNIT_NPM:       bb7:
+; IS__TUNIT_NPM-NEXT:    br label [[BB9]]
+; IS__TUNIT_NPM:       bb9:
+; IS__TUNIT_NPM-NEXT:    br label [[BB12:%.*]]
+; IS__TUNIT_NPM:       bb11:
+; IS__TUNIT_NPM-NEXT:    unreachable
+; IS__TUNIT_NPM:       bb12:
+; IS__TUNIT_NPM-NEXT:    br label [[BB13:%.*]]
+; IS__TUNIT_NPM:       bb13:
+; IS__TUNIT_NPM-NEXT:    ret i1 false
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@phi
+; IS__CGSCC_OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:  bb:
+; IS__CGSCC_OPM-NEXT:    [[TMP:%.*]] = icmp sgt i32 [[ARG]], 5
+; IS__CGSCC_OPM-NEXT:    br i1 [[TMP]], label [[BB1:%.*]], label [[BB2:%.*]]
+; IS__CGSCC_OPM:       bb1:
+; IS__CGSCC_OPM-NEXT:    br label [[BB3:%.*]]
+; IS__CGSCC_OPM:       bb2:
+; IS__CGSCC_OPM-NEXT:    br label [[BB3]]
+; IS__CGSCC_OPM:       bb3:
+; IS__CGSCC_OPM-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[ARG]], 10
+; IS__CGSCC_OPM-NEXT:    br i1 [[TMP4]], label [[BB5:%.*]], label [[BB7:%.*]]
+; IS__CGSCC_OPM:       bb5:
+; IS__CGSCC_OPM-NEXT:    br label [[BB9:%.*]]
+; IS__CGSCC_OPM:       bb7:
+; IS__CGSCC_OPM-NEXT:    br label [[BB9]]
+; IS__CGSCC_OPM:       bb9:
+; IS__CGSCC_OPM-NEXT:    br label [[BB12:%.*]]
+; IS__CGSCC_OPM:       bb11:
+; IS__CGSCC_OPM-NEXT:    unreachable
+; IS__CGSCC_OPM:       bb12:
+; IS__CGSCC_OPM-NEXT:    br label [[BB13:%.*]]
+; IS__CGSCC_OPM:       bb13:
+; IS__CGSCC_OPM-NEXT:    ret i1 false
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@phi
+; IS__CGSCC_NPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:  bb:
+; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = icmp sgt i32 [[ARG]], 5
+; IS__CGSCC_NPM-NEXT:    br i1 [[TMP]], label [[BB1:%.*]], label [[BB2:%.*]]
+; IS__CGSCC_NPM:       bb1:
+; IS__CGSCC_NPM-NEXT:    br label [[BB3:%.*]]
+; IS__CGSCC_NPM:       bb2:
+; IS__CGSCC_NPM-NEXT:    br label [[BB3]]
+; IS__CGSCC_NPM:       bb3:
+; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[ARG]], 10
+; IS__CGSCC_NPM-NEXT:    br i1 [[TMP4]], label [[BB5:%.*]], label [[BB7:%.*]]
+; IS__CGSCC_NPM:       bb5:
+; IS__CGSCC_NPM-NEXT:    br label [[BB9:%.*]]
+; IS__CGSCC_NPM:       bb7:
+; IS__CGSCC_NPM-NEXT:    br label [[BB9]]
+; IS__CGSCC_NPM:       bb9:
+; IS__CGSCC_NPM-NEXT:    br label [[BB12:%.*]]
+; IS__CGSCC_NPM:       bb11:
+; IS__CGSCC_NPM-NEXT:    unreachable
+; IS__CGSCC_NPM:       bb12:
+; IS__CGSCC_NPM-NEXT:    br label [[BB13:%.*]]
+; IS__CGSCC_NPM:       bb13:
+; IS__CGSCC_NPM-NEXT:    ret i1 false
 ;
 bb:
   %tmp = icmp sgt i32 %arg, 5
@@ -983,17 +1076,29 @@ bb13:                                             ; preds = %bb12, %bb11
 }
 
 define dso_local i1 @select(i32 %a) local_unnamed_addr #0 {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@select
-; IS__TUNIT____-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    ret i1 false
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@select
+; IS__TUNIT_OPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    ret i1 false
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@select
-; IS__CGSCC____-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    ret i1 false
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@select
+; IS__TUNIT_NPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    ret i1 false
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@select
+; IS__CGSCC_OPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:  entry:
+; IS__CGSCC_OPM-NEXT:    ret i1 false
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@select
+; IS__CGSCC_NPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    ret i1 false
 ;
 entry:
   %cmp = icmp sgt i32 %a, 5
@@ -1006,17 +1111,29 @@ entry:
 }
 
 define dso_local i32 @select_zext(i32 %a) local_unnamed_addr #0 {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@select_zext
-; IS__TUNIT____-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    ret i32 0
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@select_zext
+; IS__TUNIT_OPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    ret i32 0
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@select_zext
-; IS__CGSCC____-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    ret i32 0
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@select_zext
+; IS__TUNIT_NPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    ret i32 0
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@select_zext
+; IS__CGSCC_OPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:  entry:
+; IS__CGSCC_OPM-NEXT:    ret i32 0
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@select_zext
+; IS__CGSCC_NPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    ret i32 0
 ;
 entry:
   %cmp = icmp sgt i32 %a, 5
@@ -1031,23 +1148,41 @@ entry:
 
 ; FIXME: We do not look through the ptr casts here.
 define dso_local i64 @select_int2ptr_bitcast_ptr2int(i32 %a) local_unnamed_addr #0 {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int
-; IS__TUNIT____-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[I2P:%.*]] = inttoptr i1 false to i1*
-; IS__TUNIT____-NEXT:    [[BC:%.*]] = bitcast i1* [[I2P]] to i32*
-; IS__TUNIT____-NEXT:    [[P2I:%.*]] = ptrtoint i32* [[BC]] to i64
-; IS__TUNIT____-NEXT:    ret i64 [[P2I]]
-;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int
-; IS__CGSCC____-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[I2P:%.*]] = inttoptr i1 false to i1*
-; IS__CGSCC____-NEXT:    [[BC:%.*]] = bitcast i1* [[I2P]] to i32*
-; IS__CGSCC____-NEXT:    [[P2I:%.*]] = ptrtoint i32* [[BC]] to i64
-; IS__CGSCC____-NEXT:    ret i64 [[P2I]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int
+; IS__TUNIT_OPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    [[I2P:%.*]] = inttoptr i1 false to i1*
+; IS__TUNIT_OPM-NEXT:    [[BC:%.*]] = bitcast i1* [[I2P]] to i32*
+; IS__TUNIT_OPM-NEXT:    [[P2I:%.*]] = ptrtoint i32* [[BC]] to i64
+; IS__TUNIT_OPM-NEXT:    ret i64 [[P2I]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int
+; IS__TUNIT_NPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[I2P:%.*]] = inttoptr i1 false to i1*
+; IS__TUNIT_NPM-NEXT:    [[BC:%.*]] = bitcast i1* [[I2P]] to i32*
+; IS__TUNIT_NPM-NEXT:    [[P2I:%.*]] = ptrtoint i32* [[BC]] to i64
+; IS__TUNIT_NPM-NEXT:    ret i64 [[P2I]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int
+; IS__CGSCC_OPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:  entry:
+; IS__CGSCC_OPM-NEXT:    [[I2P:%.*]] = inttoptr i1 false to i1*
+; IS__CGSCC_OPM-NEXT:    [[BC:%.*]] = bitcast i1* [[I2P]] to i32*
+; IS__CGSCC_OPM-NEXT:    [[P2I:%.*]] = ptrtoint i32* [[BC]] to i64
+; IS__CGSCC_OPM-NEXT:    ret i64 [[P2I]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@select_int2ptr_bitcast_ptr2int
+; IS__CGSCC_NPM-SAME: (i32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    [[I2P:%.*]] = inttoptr i1 false to i1*
+; IS__CGSCC_NPM-NEXT:    [[BC:%.*]] = bitcast i1* [[I2P]] to i32*
+; IS__CGSCC_NPM-NEXT:    [[P2I:%.*]] = ptrtoint i32* [[BC]] to i64
+; IS__CGSCC_NPM-NEXT:    ret i64 [[P2I]]
 ;
 entry:
   %cmp = icmp sgt i32 %a, 5
@@ -1065,97 +1200,165 @@ entry:
 ; }
 
 define i1 @f_fcmp(float %a, float %b) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@f_fcmp
-; IS__TUNIT____-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[R:%.*]] = fcmp uge float [[A]], [[B]]
-; IS__TUNIT____-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
-; IS__TUNIT____-NEXT:    ret i1 [[S]]
-;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@f_fcmp
-; IS__CGSCC____-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[R:%.*]] = fcmp uge float [[A]], [[B]]
-; IS__CGSCC____-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
-; IS__CGSCC____-NEXT:    ret i1 [[S]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f_fcmp
+; IS__TUNIT_OPM-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = fcmp uge float [[A]], [[B]]
+; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__TUNIT_OPM-NEXT:    ret i1 [[S]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f_fcmp
+; IS__TUNIT_NPM-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    [[R:%.*]] = fcmp uge float [[A]], [[B]]
+; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__TUNIT_NPM-NEXT:    ret i1 [[S]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f_fcmp
+; IS__CGSCC_OPM-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[R:%.*]] = fcmp uge float [[A]], [[B]]
+; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__CGSCC_OPM-NEXT:    ret i1 [[S]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f_fcmp
+; IS__CGSCC_NPM-SAME: (float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[R:%.*]] = fcmp uge float [[A]], [[B]]
+; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__CGSCC_NPM-NEXT:    ret i1 [[S]]
 ;
   %r = fcmp uge float %a, %b
   %s = select i1 %r, i1 %r, i1 0
   ret i1 %s
 }
 define i1 @d_fcmp(double %a, double %b) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@d_fcmp
-; IS__TUNIT____-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[R:%.*]] = fcmp oeq double [[A]], [[B]]
-; IS__TUNIT____-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
-; IS__TUNIT____-NEXT:    ret i1 [[S]]
-;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@d_fcmp
-; IS__CGSCC____-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[R:%.*]] = fcmp oeq double [[A]], [[B]]
-; IS__CGSCC____-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
-; IS__CGSCC____-NEXT:    ret i1 [[S]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@d_fcmp
+; IS__TUNIT_OPM-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = fcmp oeq double [[A]], [[B]]
+; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__TUNIT_OPM-NEXT:    ret i1 [[S]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@d_fcmp
+; IS__TUNIT_NPM-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    [[R:%.*]] = fcmp oeq double [[A]], [[B]]
+; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__TUNIT_NPM-NEXT:    ret i1 [[S]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@d_fcmp
+; IS__CGSCC_OPM-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[R:%.*]] = fcmp oeq double [[A]], [[B]]
+; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__CGSCC_OPM-NEXT:    ret i1 [[S]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@d_fcmp
+; IS__CGSCC_NPM-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[R:%.*]] = fcmp oeq double [[A]], [[B]]
+; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__CGSCC_NPM-NEXT:    ret i1 [[S]]
 ;
   %r = fcmp oeq double %a, %b
   %s = select i1 %r, i1 %r, i1 0
   ret i1 %s
 }
 define i1 @dp_icmp(double* %a, double* %b) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@dp_icmp
-; IS__TUNIT____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[R:%.*]] = icmp sge double* [[A]], [[B]]
-; IS__TUNIT____-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
-; IS__TUNIT____-NEXT:    ret i1 [[S]]
-;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@dp_icmp
-; IS__CGSCC____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[R:%.*]] = icmp sge double* [[A]], [[B]]
-; IS__CGSCC____-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
-; IS__CGSCC____-NEXT:    ret i1 [[S]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@dp_icmp
+; IS__TUNIT_OPM-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = icmp sge double* [[A]], [[B]]
+; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__TUNIT_OPM-NEXT:    ret i1 [[S]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@dp_icmp
+; IS__TUNIT_NPM-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    [[R:%.*]] = icmp sge double* [[A]], [[B]]
+; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__TUNIT_NPM-NEXT:    ret i1 [[S]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@dp_icmp
+; IS__CGSCC_OPM-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[R:%.*]] = icmp sge double* [[A]], [[B]]
+; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__CGSCC_OPM-NEXT:    ret i1 [[S]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@dp_icmp
+; IS__CGSCC_NPM-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[R:%.*]] = icmp sge double* [[A]], [[B]]
+; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__CGSCC_NPM-NEXT:    ret i1 [[S]]
 ;
   %r = icmp sge double* %a, %b
   %s = select i1 %r, i1 %r, i1 0
   ret i1 %s
 }
 define i1 @ip_icmp(i8* %a, i8* %b) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@ip_icmp
-; IS__TUNIT____-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[R:%.*]] = icmp ult i8* [[A]], [[B]]
-; IS__TUNIT____-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
-; IS__TUNIT____-NEXT:    ret i1 [[S]]
-;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@ip_icmp
-; IS__CGSCC____-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[R:%.*]] = icmp ult i8* [[A]], [[B]]
-; IS__CGSCC____-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
-; IS__CGSCC____-NEXT:    ret i1 [[S]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@ip_icmp
+; IS__TUNIT_OPM-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = icmp ult i8* [[A]], [[B]]
+; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__TUNIT_OPM-NEXT:    ret i1 [[S]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@ip_icmp
+; IS__TUNIT_NPM-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    [[R:%.*]] = icmp ult i8* [[A]], [[B]]
+; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__TUNIT_NPM-NEXT:    ret i1 [[S]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@ip_icmp
+; IS__CGSCC_OPM-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[R:%.*]] = icmp ult i8* [[A]], [[B]]
+; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__CGSCC_OPM-NEXT:    ret i1 [[S]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@ip_icmp
+; IS__CGSCC_NPM-SAME: (i8* nofree readnone [[A:%.*]], i8* nofree readnone [[B:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[R:%.*]] = icmp ult i8* [[A]], [[B]]
+; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = select i1 [[R]], i1 [[R]], i1 false
+; IS__CGSCC_NPM-NEXT:    ret i1 [[S]]
 ;
   %r = icmp ult i8* %a, %b
   %s = select i1 %r, i1 %r, i1 0
   ret i1 %s
 }
 define i1 @fcmp_caller(float %fa, float %fb, double %da, double %db, double* %dpa, double* %dpb, i8* %ipa, i8* %ipb) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@fcmp_caller
-; IS__TUNIT____-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR1]]
-; IS__TUNIT____-NEXT:    [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR1]]
-; IS__TUNIT____-NEXT:    [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR1]]
-; IS__TUNIT____-NEXT:    [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR1]]
-; IS__TUNIT____-NEXT:    [[O1:%.*]] = or i1 [[R1]], [[R2]]
-; IS__TUNIT____-NEXT:    [[O2:%.*]] = or i1 [[R3]], [[R4]]
-; IS__TUNIT____-NEXT:    [[O3:%.*]] = or i1 [[O1]], [[O2]]
-; IS__TUNIT____-NEXT:    ret i1 [[O3]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fcmp_caller
+; IS__TUNIT_OPM-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR2]]
+; IS__TUNIT_OPM-NEXT:    [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR2]]
+; IS__TUNIT_OPM-NEXT:    [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR2]]
+; IS__TUNIT_OPM-NEXT:    [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR2]]
+; IS__TUNIT_OPM-NEXT:    [[O1:%.*]] = or i1 [[R1]], [[R2]]
+; IS__TUNIT_OPM-NEXT:    [[O2:%.*]] = or i1 [[R3]], [[R4]]
+; IS__TUNIT_OPM-NEXT:    [[O3:%.*]] = or i1 [[O1]], [[O2]]
+; IS__TUNIT_OPM-NEXT:    ret i1 [[O3]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fcmp_caller
+; IS__TUNIT_NPM-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR1]]
+; IS__TUNIT_NPM-NEXT:    [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR1]]
+; IS__TUNIT_NPM-NEXT:    [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR1]]
+; IS__TUNIT_NPM-NEXT:    [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR1]]
+; IS__TUNIT_NPM-NEXT:    [[O1:%.*]] = or i1 [[R1]], [[R2]]
+; IS__TUNIT_NPM-NEXT:    [[O2:%.*]] = or i1 [[R3]], [[R4]]
+; IS__TUNIT_NPM-NEXT:    [[O3:%.*]] = or i1 [[O1]], [[O2]]
+; IS__TUNIT_NPM-NEXT:    ret i1 [[O3]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fcmp_caller
-; IS__CGSCC_OPM-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_OPM-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR2]] {
 ; IS__CGSCC_OPM-NEXT:    [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR5]]
 ; IS__CGSCC_OPM-NEXT:    [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR5]]
 ; IS__CGSCC_OPM-NEXT:    [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR5]]
@@ -1188,43 +1391,73 @@ define i1 @fcmp_caller(float %fa, float %fb, double %da, double %db, double* %dp
 }
 
 define i8 @ret_two() {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@ret_two
-; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i8 2
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@ret_two
+; IS__TUNIT_OPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i8 2
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@ret_two
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i8 2
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@ret_two
+; IS__TUNIT_NPM-SAME: () #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i8 2
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@ret_two
+; IS__CGSCC_OPM-SAME: () #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i8 2
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@ret_two
+; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i8 2
 ;
   ret i8 2
 }
 define i8 @ret_undef() {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@ret_undef
-; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i8 undef
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@ret_undef
+; IS__TUNIT_OPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i8 undef
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@ret_undef
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i8 undef
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@ret_undef
+; IS__TUNIT_NPM-SAME: () #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i8 undef
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@ret_undef
+; IS__CGSCC_OPM-SAME: () #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i8 undef
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@ret_undef
+; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i8 undef
 ;
   ret i8 undef
 }
 
 ; Verify we collapse undef to a value and return something non-undef here.
 define i8 @undef_collapse_1() {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@undef_collapse_1
-; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i8 0
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@undef_collapse_1
+; IS__TUNIT_OPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i8 0
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@undef_collapse_1
+; IS__TUNIT_NPM-SAME: () #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i8 0
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@undef_collapse_1
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i8 0
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@undef_collapse_1
+; IS__CGSCC_OPM-SAME: () #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i8 0
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@undef_collapse_1
+; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i8 0
 ;
   %c = call i8 @ret_undef()
   %s = shl i8 %c, 2
@@ -1233,15 +1466,25 @@ define i8 @undef_collapse_1() {
 
 ; Verify we collapse undef to a value and return something non-undef here.
 define i8 @undef_collapse_2() {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@undef_collapse_2
-; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i8 0
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@undef_collapse_2
+; IS__TUNIT_OPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i8 0
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@undef_collapse_2
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i8 0
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@undef_collapse_2
+; IS__TUNIT_NPM-SAME: () #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i8 0
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@undef_collapse_2
+; IS__CGSCC_OPM-SAME: () #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i8 0
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@undef_collapse_2
+; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i8 0
 ;
   %c = call i8 @ret_two()
   %s = shl i8 undef, %c
@@ -1250,15 +1493,25 @@ define i8 @undef_collapse_2() {
 
 define i8 @undef_collapse_caller() {
 ;
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@undef_collapse_caller
-; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i8 0
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@undef_collapse_caller
+; IS__TUNIT_OPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i8 0
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@undef_collapse_caller
+; IS__TUNIT_NPM-SAME: () #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i8 0
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@undef_collapse_caller
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i8 0
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@undef_collapse_caller
+; IS__CGSCC_OPM-SAME: () #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i8 0
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@undef_collapse_caller
+; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i8 0
 ;
   %c1 = call i8 @undef_collapse_1()
   %c2 = call i8 @undef_collapse_2()
@@ -1267,32 +1520,54 @@ define i8 @undef_collapse_caller() {
 }
 
 define i32 @ret1or2(i1 %c) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@ret1or2
-; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[S:%.*]] = select i1 [[C]], i32 1, i32 2
-; IS__TUNIT____-NEXT:    ret i32 [[S]]
-;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@ret1or2
-; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[S:%.*]] = select i1 [[C]], i32 1, i32 2
-; IS__CGSCC____-NEXT:    ret i32 [[S]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@ret1or2
+; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    [[S:%.*]] = select i1 [[C]], i32 1, i32 2
+; IS__TUNIT_OPM-NEXT:    ret i32 [[S]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@ret1or2
+; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    [[S:%.*]] = select i1 [[C]], i32 1, i32 2
+; IS__TUNIT_NPM-NEXT:    ret i32 [[S]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@ret1or2
+; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[S:%.*]] = select i1 [[C]], i32 1, i32 2
+; IS__CGSCC_OPM-NEXT:    ret i32 [[S]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@ret1or2
+; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[S:%.*]] = select i1 [[C]], i32 1, i32 2
+; IS__CGSCC_NPM-NEXT:    ret i32 [[S]]
 ;
   %s = select i1 %c, i32 1, i32 2
   ret i32 %s
 }
 define i1 @callee_range_1(i1 %c1, i1 %c2, i1 %c3) {
 ;
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@callee_range_1
-; IS__TUNIT____-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i1 true
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callee_range_1
+; IS__TUNIT_OPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i1 true
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_range_1
+; IS__TUNIT_NPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i1 true
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@callee_range_1
-; IS__CGSCC____-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i1 true
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@callee_range_1
+; IS__CGSCC_OPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_range_1
+; IS__CGSCC_NPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]], i1 [[C3:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i1 true
 ;
   %r1 = call i32 @ret1or2(i1 %c1)
   %r2 = call i32 @ret1or2(i1 %c2)
@@ -1308,9 +1583,9 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) {
 ;
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@callee_range_2
-; IS__TUNIT_OPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR1]] {
-; IS__TUNIT_OPM-NEXT:    [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR1]], !range [[RNG4:![0-9]+]]
-; IS__TUNIT_OPM-NEXT:    [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR1]], !range [[RNG4]]
+; IS__TUNIT_OPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR2]], !range [[RNG4:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR2]], !range [[RNG4]]
 ; IS__TUNIT_OPM-NEXT:    [[A:%.*]] = add i32 [[R1]], [[R2]]
 ; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = icmp sle i32 [[A]], 3
 ; IS__TUNIT_OPM-NEXT:    [[F:%.*]] = and i1 [[I1]], true
@@ -1328,7 +1603,7 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) {
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@callee_range_2
-; IS__CGSCC_OPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_OPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR2]] {
 ; IS__CGSCC_OPM-NEXT:    [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR5]], !range [[RNG5:![0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR5]], !range [[RNG5]]
 ; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = add i32 [[R1]], [[R2]]
@@ -1357,15 +1632,25 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) {
 
 
 define i32 @ret100() {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@ret100
-; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i32 100
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@ret100
+; IS__TUNIT_OPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i32 100
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@ret100
+; IS__TUNIT_NPM-SAME: () #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i32 100
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@ret100
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i32 100
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@ret100
+; IS__CGSCC_OPM-SAME: () #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i32 100
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@ret100
+; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i32 100
 ;
   ret i32 100
 }
@@ -1374,7 +1659,7 @@ define i1 @ctx_adjustment(i32 %V) {
 ;
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@ctx_adjustment
-; IS__TUNIT_OPM-SAME: (i32 [[V:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_OPM-SAME: (i32 [[V:%.*]]) #[[ATTR2]] {
 ; IS__TUNIT_OPM-NEXT:    [[C1:%.*]] = icmp sge i32 [[V]], 100
 ; IS__TUNIT_OPM-NEXT:    br i1 [[C1]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
 ; IS__TUNIT_OPM:       if.true:
@@ -1400,7 +1685,7 @@ define i1 @ctx_adjustment(i32 %V) {
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@ctx_adjustment
-; IS__CGSCC_OPM-SAME: (i32 [[V:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_OPM-SAME: (i32 [[V:%.*]]) #[[ATTR2]] {
 ; IS__CGSCC_OPM-NEXT:    [[C1:%.*]] = icmp sge i32 [[V]], 100
 ; IS__CGSCC_OPM-NEXT:    br i1 [[C1]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
 ; IS__CGSCC_OPM:       if.true:
@@ -1439,17 +1724,29 @@ end:
 
 
 define i32 @func(i1 %c) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@func
-; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[RET:%.*]] = select i1 [[C]], i32 0, i32 1
-; IS__TUNIT____-NEXT:    ret i32 [[RET]]
-;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@func
-; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[RET:%.*]] = select i1 [[C]], i32 0, i32 1
-; IS__CGSCC____-NEXT:    ret i32 [[RET]]
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@func
+; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    [[RET:%.*]] = select i1 [[C]], i32 0, i32 1
+; IS__TUNIT_OPM-NEXT:    ret i32 [[RET]]
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@func
+; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    [[RET:%.*]] = select i1 [[C]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    ret i32 [[RET]]
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@func
+; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[RET:%.*]] = select i1 [[C]], i32 0, i32 1
+; IS__CGSCC_OPM-NEXT:    ret i32 [[RET]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@func
+; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[RET:%.*]] = select i1 [[C]], i32 0, i32 1
+; IS__CGSCC_NPM-NEXT:    ret i32 [[RET]]
 ;
   %ret = select i1 %c, i32 0, i32 1
   ret i32 %ret
@@ -1458,14 +1755,14 @@ define i32 @func(i1 %c) {
 define i32 @simplify_callsite_argument(i1 %d) {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@simplify_callsite_argument
-; IS__TUNIT_OPM-SAME: (i1 [[D:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_OPM-SAME: (i1 [[D:%.*]]) #[[ATTR2]] {
 ; IS__TUNIT_OPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
 ; IS__TUNIT_OPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT_OPM:       t:
-; IS__TUNIT_OPM-NEXT:    [[RET1:%.*]] = call noundef i32 @func(i1 noundef [[C]]) #[[ATTR1]], !range [[RNG3]]
+; IS__TUNIT_OPM-NEXT:    [[RET1:%.*]] = call i32 @func(i1 noundef [[C]]) #[[ATTR2]], !range [[RNG3]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[RET1]]
 ; IS__TUNIT_OPM:       f:
-; IS__TUNIT_OPM-NEXT:    [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR1]], !range [[RNG3]]
+; IS__TUNIT_OPM-NEXT:    [[RET2:%.*]] = call i32 @func(i1 noundef false) #[[ATTR2]], !range [[RNG3]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[RET2]]
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
@@ -1474,22 +1771,22 @@ define i32 @simplify_callsite_argument(i1 %d) {
 ; IS__TUNIT_NPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
 ; IS__TUNIT_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT_NPM:       t:
-; IS__TUNIT_NPM-NEXT:    [[RET1:%.*]] = call noundef i32 @func(i1 noundef true) #[[ATTR1]], !range [[RNG4]]
+; IS__TUNIT_NPM-NEXT:    [[RET1:%.*]] = call i32 @func(i1 noundef true) #[[ATTR1]], !range [[RNG4]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[RET1]]
 ; IS__TUNIT_NPM:       f:
-; IS__TUNIT_NPM-NEXT:    [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR1]], !range [[RNG4]]
+; IS__TUNIT_NPM-NEXT:    [[RET2:%.*]] = call i32 @func(i1 noundef false) #[[ATTR1]], !range [[RNG4]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[RET2]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@simplify_callsite_argument
-; IS__CGSCC_OPM-SAME: (i1 [[D:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_OPM-SAME: (i1 [[D:%.*]]) #[[ATTR2]] {
 ; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
 ; IS__CGSCC_OPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC_OPM:       t:
-; IS__CGSCC_OPM-NEXT:    [[RET1:%.*]] = call noundef i32 @func(i1 noundef [[C]]) #[[ATTR5]], !range [[RNG4]]
+; IS__CGSCC_OPM-NEXT:    [[RET1:%.*]] = call i32 @func(i1 noundef [[C]]) #[[ATTR5]], !range [[RNG4]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 [[RET1]]
 ; IS__CGSCC_OPM:       f:
-; IS__CGSCC_OPM-NEXT:    [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR5]], !range [[RNG4]]
+; IS__CGSCC_OPM-NEXT:    [[RET2:%.*]] = call i32 @func(i1 noundef false) #[[ATTR5]], !range [[RNG4]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 [[RET2]]
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
@@ -1498,10 +1795,10 @@ define i32 @simplify_callsite_argument(i1 %d) {
 ; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
 ; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC_NPM:       t:
-; IS__CGSCC_NPM-NEXT:    [[RET1:%.*]] = call noundef i32 @func(i1 noundef true) #[[ATTR3]], !range [[RNG4]]
+; IS__CGSCC_NPM-NEXT:    [[RET1:%.*]] = call i32 @func(i1 noundef true) #[[ATTR3]], !range [[RNG4]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[RET1]]
 ; IS__CGSCC_NPM:       f:
-; IS__CGSCC_NPM-NEXT:    [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR3]], !range [[RNG4]]
+; IS__CGSCC_NPM-NEXT:    [[RET2:%.*]] = call i32 @func(i1 noundef false) #[[ATTR3]], !range [[RNG4]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[RET2]]
 ;
   %c = select i1 %d, i1 true, i1 false
@@ -1515,38 +1812,59 @@ f:
 }
 
 define internal i32 @less_than_65536(i32 %arg) {
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@less_than_65536
+; IS__CGSCC_OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[SHRINKED:%.*]] = udiv i32 [[ARG]], 65536
+; IS__CGSCC_OPM-NEXT:    ret i32 [[SHRINKED]]
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@less_than_65536
-; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[SHRINKED:%.*]] = udiv i32 undef, 65536
-; IS__CGSCC____-NEXT:    ret i32 undef
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@less_than_65536
+; IS__CGSCC_NPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[SHRINKED:%.*]] = udiv i32 [[ARG]], 65536
+; IS__CGSCC_NPM-NEXT:    ret i32 [[SHRINKED]]
 ;
   %shrinked = udiv i32 %arg, 65536
   ret i32 %shrinked
 }
 
 define internal i1 @is_less_than_65536(i32 %arg) {
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@is_less_than_65536
-; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp ult i32 undef, 65536
-; IS__CGSCC____-NEXT:    ret i1 undef
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@is_less_than_65536
+; IS__CGSCC_OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ARG]], 65536
+; IS__CGSCC_OPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@is_less_than_65536
+; IS__CGSCC_NPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp ult i32 [[ARG]], 65536
+; IS__CGSCC_NPM-NEXT:    ret i1 true
 ;
   %cmp = icmp ult i32 %arg, 65536
   ret i1 %cmp
 }
 
 define i1 @check_divided_range(i32 %arg) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@check_divided_range
-; IS__TUNIT____-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i1 true
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@check_divided_range
+; IS__TUNIT_OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i1 true
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@check_divided_range
-; IS__CGSCC____-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i1 true
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@check_divided_range
+; IS__TUNIT_NPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@check_divided_range
+; IS__CGSCC_OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@check_divided_range
+; IS__CGSCC_NPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i1 true
 ;
   %csret1 = call i32 @less_than_65536(i32 0)
   %csret2 = call i32 @less_than_65536(i32 %arg)
@@ -1557,37 +1875,59 @@ define i1 @check_divided_range(i32 %arg) {
 }
 
 define internal i32 @cast_and_return(i1 %c) {
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@cast_and_return
+; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[RET:%.*]] = zext i1 [[C]] to i32
+; IS__CGSCC_OPM-NEXT:    ret i32 [[RET]]
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@cast_and_return
-; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[RET:%.*]] = zext i1 undef to i32
-; IS__CGSCC____-NEXT:    ret i32 undef
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@cast_and_return
+; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[RET:%.*]] = zext i1 [[C]] to i32
+; IS__CGSCC_NPM-NEXT:    ret i32 [[RET]]
 ;
   %ret = zext i1 %c to i32
   ret i32 %ret
 }
 
 define internal i1 @is_less_than_3(i32 %c) {
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@is_less_than_3
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i1 undef
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@is_less_than_3
+; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[C]], 3
+; IS__CGSCC_OPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@is_less_than_3
+; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[C]], 3
+; IS__CGSCC_NPM-NEXT:    ret i1 true
 ;
   %cmp = icmp slt i32 %c, 3
   ret i1 %cmp
 }
 
 define i1 @check_casted_range(i1 %c) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@check_casted_range
-; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i1 true
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@check_casted_range
+; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i1 true
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@check_casted_range
+; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@check_casted_range
+; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i1 true
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@check_casted_range
-; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i1 true
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@check_casted_range
+; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i1 true
 ;
   %csret1 = call i32 @cast_and_return(i1 true)
   %csret2 = call i32 @cast_and_return(i1 %c)
@@ -1597,34 +1937,63 @@ define i1 @check_casted_range(i1 %c) {
 }
 
 define internal i32 @less_than_100_1(i32 %c) {
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@less_than_100_1
-; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    switch i32 [[C]], label [[OTHERWISE:%.*]] [
-; IS__CGSCC____-NEXT:    i32 0, label [[ONZERO:%.*]]
-; IS__CGSCC____-NEXT:    i32 1, label [[ONONE:%.*]]
-; IS__CGSCC____-NEXT:    i32 2, label [[ONTWO:%.*]]
-; IS__CGSCC____-NEXT:    i32 3, label [[ONTHREE:%.*]]
-; IS__CGSCC____-NEXT:    i32 4, label [[ONFOUR:%.*]]
-; IS__CGSCC____-NEXT:    i32 5, label [[ONFIVE:%.*]]
-; IS__CGSCC____-NEXT:    i32 6, label [[ONSIX:%.*]]
-; IS__CGSCC____-NEXT:    ]
-; IS__CGSCC____:       onzero:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onone:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       ontwo:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onthree:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onfour:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onfive:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onsix:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       otherwise:
-; IS__CGSCC____-NEXT:    ret i32 undef
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@less_than_100_1
+; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    switch i32 [[C]], label [[OTHERWISE:%.*]] [
+; IS__CGSCC_OPM-NEXT:    i32 0, label [[ONZERO:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 1, label [[ONONE:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 2, label [[ONTWO:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 3, label [[ONTHREE:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 4, label [[ONFOUR:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 5, label [[ONFIVE:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 6, label [[ONSIX:%.*]]
+; IS__CGSCC_OPM-NEXT:    ]
+; IS__CGSCC_OPM:       onzero:
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+; IS__CGSCC_OPM:       onone:
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+; IS__CGSCC_OPM:       ontwo:
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+; IS__CGSCC_OPM:       onthree:
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+; IS__CGSCC_OPM:       onfour:
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+; IS__CGSCC_OPM:       onfive:
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+; IS__CGSCC_OPM:       onsix:
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+; IS__CGSCC_OPM:       otherwise:
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@less_than_100_1
+; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    switch i32 [[C]], label [[OTHERWISE:%.*]] [
+; IS__CGSCC_NPM-NEXT:    i32 0, label [[ONZERO:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 1, label [[ONONE:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 2, label [[ONTWO:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 3, label [[ONTHREE:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 4, label [[ONFOUR:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 5, label [[ONFIVE:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 6, label [[ONSIX:%.*]]
+; IS__CGSCC_NPM-NEXT:    ]
+; IS__CGSCC_NPM:       onzero:
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM:       onone:
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM:       ontwo:
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM:       onthree:
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM:       onfour:
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM:       onfive:
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM:       onsix:
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM:       otherwise:
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
 ;
   switch i32 %c, label %otherwise [ i32 0, label %onzero
   i32 1, label %onone
@@ -1652,25 +2021,40 @@ otherwise:
 }
 
 define internal i1 @is_less_than_100_1(i32 %c) {
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@is_less_than_100_1
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i1 undef
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@is_less_than_100_1
+; IS__CGSCC_OPM-SAME: () #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i1 undef
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@is_less_than_100_1
+; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i1 undef
 ;
   %cmp = icmp slt i32 %c, 100
   ret i1 %cmp
 }
 
 define i1 @propagate_range1(i32 %c){
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@propagate_range1
-; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i1 true
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@propagate_range1
+; IS__TUNIT_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i1 true
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@propagate_range1
-; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i1 true
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@propagate_range1
+; IS__TUNIT_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@propagate_range1
+; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@propagate_range1
+; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i1 true
 ;
   %csret = call i32 @less_than_100_1(i32 %c)
   %true = call i1 @is_less_than_100_1(i32 %csret)
@@ -1678,35 +2062,63 @@ define i1 @propagate_range1(i32 %c){
 }
 
 define internal i32 @less_than_100_2(i32 %c) {
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@less_than_100_2
+; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    switch i32 [[C]], label [[OTHERWISE:%.*]] [
+; IS__CGSCC_OPM-NEXT:    i32 0, label [[ONZERO:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 1, label [[ONONE:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 2, label [[ONTWO:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 3, label [[ONTHREE:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 4, label [[ONFOUR:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 5, label [[ONFIVE:%.*]]
+; IS__CGSCC_OPM-NEXT:    i32 6, label [[ONSIX:%.*]]
+; IS__CGSCC_OPM-NEXT:    ]
+; IS__CGSCC_OPM:       onzero:
+; IS__CGSCC_OPM-NEXT:    ret i32 0
+; IS__CGSCC_OPM:       onone:
+; IS__CGSCC_OPM-NEXT:    ret i32 1
+; IS__CGSCC_OPM:       ontwo:
+; IS__CGSCC_OPM-NEXT:    ret i32 2
+; IS__CGSCC_OPM:       onthree:
+; IS__CGSCC_OPM-NEXT:    ret i32 3
+; IS__CGSCC_OPM:       onfour:
+; IS__CGSCC_OPM-NEXT:    ret i32 4
+; IS__CGSCC_OPM:       onfive:
+; IS__CGSCC_OPM-NEXT:    ret i32 5
+; IS__CGSCC_OPM:       onsix:
+; IS__CGSCC_OPM-NEXT:    ret i32 6
+; IS__CGSCC_OPM:       otherwise:
+; IS__CGSCC_OPM-NEXT:    ret i32 99
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@less_than_100_2
-; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    switch i32 [[C]], label [[OTHERWISE:%.*]] [
-; IS__CGSCC____-NEXT:    i32 0, label [[ONZERO:%.*]]
-; IS__CGSCC____-NEXT:    i32 1, label [[ONONE:%.*]]
-; IS__CGSCC____-NEXT:    i32 2, label [[ONTWO:%.*]]
-; IS__CGSCC____-NEXT:    i32 3, label [[ONTHREE:%.*]]
-; IS__CGSCC____-NEXT:    i32 4, label [[ONFOUR:%.*]]
-; IS__CGSCC____-NEXT:    i32 5, label [[ONFIVE:%.*]]
-; IS__CGSCC____-NEXT:    i32 6, label [[ONSIX:%.*]]
-; IS__CGSCC____-NEXT:    ]
-; IS__CGSCC____:       onzero:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onone:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       ontwo:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onthree:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onfour:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onfive:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       onsix:
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       otherwise:
-; IS__CGSCC____-NEXT:    ret i32 undef
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@less_than_100_2
+; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    switch i32 [[C]], label [[OTHERWISE:%.*]] [
+; IS__CGSCC_NPM-NEXT:    i32 0, label [[ONZERO:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 1, label [[ONONE:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 2, label [[ONTWO:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 3, label [[ONTHREE:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 4, label [[ONFOUR:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 5, label [[ONFIVE:%.*]]
+; IS__CGSCC_NPM-NEXT:    i32 6, label [[ONSIX:%.*]]
+; IS__CGSCC_NPM-NEXT:    ]
+; IS__CGSCC_NPM:       onzero:
+; IS__CGSCC_NPM-NEXT:    ret i32 0
+; IS__CGSCC_NPM:       onone:
+; IS__CGSCC_NPM-NEXT:    ret i32 1
+; IS__CGSCC_NPM:       ontwo:
+; IS__CGSCC_NPM-NEXT:    ret i32 2
+; IS__CGSCC_NPM:       onthree:
+; IS__CGSCC_NPM-NEXT:    ret i32 3
+; IS__CGSCC_NPM:       onfour:
+; IS__CGSCC_NPM-NEXT:    ret i32 4
+; IS__CGSCC_NPM:       onfive:
+; IS__CGSCC_NPM-NEXT:    ret i32 5
+; IS__CGSCC_NPM:       onsix:
+; IS__CGSCC_NPM-NEXT:    ret i32 6
+; IS__CGSCC_NPM:       otherwise:
+; IS__CGSCC_NPM-NEXT:    ret i32 99
 ;
   switch i32 %c, label %otherwise [ i32 0, label %onzero
   i32 1, label %onone
@@ -1734,27 +2146,42 @@ otherwise:
 }
 
 define internal i1 @is_less_than_100_2(i32 %c) {
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@is_less_than_100_2
+; IS__CGSCC_OPM-SAME: (i32 noundef [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[C]], 100
+; IS__CGSCC_OPM-NEXT:    ret i1 true
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@is_less_than_100_2
-; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp slt i32 undef, 100
-; IS__CGSCC____-NEXT:    ret i1 undef
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@is_less_than_100_2
+; IS__CGSCC_NPM-SAME: (i32 noundef [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[C]], 100
+; IS__CGSCC_NPM-NEXT:    ret i1 true
 ;
   %cmp = icmp slt i32 %c, 100
   ret i1 %cmp
 }
 
 define i1 @propagate_range2(i32 %c) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@propagate_range2
-; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i1 true
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@propagate_range2
+; IS__TUNIT_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_OPM-NEXT:    ret i1 true
+;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@propagate_range2
+; IS__TUNIT_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    ret i1 true
+;
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@propagate_range2
+; IS__CGSCC_OPM-SAME: (i32 [[C:%.*]]) #[[ATTR2]] {
+; IS__CGSCC_OPM-NEXT:    ret i1 true
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@propagate_range2
-; IS__CGSCC____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i1 true
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@propagate_range2
+; IS__CGSCC_NPM-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    ret i1 true
 ;
   %csret1 = call i32 @less_than_100_2(i32 0)
   %true1 = call i1 @is_less_than_100_2(i32 %csret1)
@@ -1767,20 +2194,27 @@ define i1 @propagate_range2(i32 %c) {
 define internal i1 @non_zero(i8 %v) {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@non_zero
-; IS__TUNIT_OPM-SAME: (i8 [[V:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_OPM-SAME: (i8 [[V:%.*]]) #[[ATTR2]] {
 ; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = icmp ne i8 [[V]], 0
 ; IS__TUNIT_OPM-NEXT:    ret i1 [[R]]
 ;
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@non_zero
+; IS__TUNIT_NPM-SAME: (i8 [[V:%.*]]) #[[ATTR1]] {
+; IS__TUNIT_NPM-NEXT:    [[R:%.*]] = icmp ne i8 [[V]], 0
+; IS__TUNIT_NPM-NEXT:    ret i1 [[R]]
+;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@non_zero
-; IS__CGSCC_OPM-SAME: (i8 [[V:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_OPM-SAME: (i8 [[V:%.*]]) #[[ATTR2]] {
 ; IS__CGSCC_OPM-NEXT:    [[R:%.*]] = icmp ne i8 [[V]], 0
 ; IS__CGSCC_OPM-NEXT:    ret i1 [[R]]
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@non_zero
-; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:    ret i1 undef
+; IS__CGSCC_NPM-SAME: (i8 [[V:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[R:%.*]] = icmp ne i8 [[V]], 0
+; IS__CGSCC_NPM-NEXT:    ret i1 [[R]]
 ;
   %r = icmp ne i8 %v, 0
   ret i1 %r
@@ -1795,7 +2229,7 @@ define i1 @context(i8* %p) {
 ; IS__TUNIT_OPM-NEXT:    [[C:%.*]] = icmp slt i8 0, [[L]]
 ; IS__TUNIT_OPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT_OPM:       t:
-; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR1]]
+; IS__TUNIT_OPM-NEXT:    [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR2]]
 ; IS__TUNIT_OPM-NEXT:    ret i1 [[R]]
 ; IS__TUNIT_OPM:       f:
 ; IS__TUNIT_OPM-NEXT:    ret i1 false
@@ -1807,7 +2241,8 @@ define i1 @context(i8* %p) {
 ; IS__TUNIT_NPM-NEXT:    [[C:%.*]] = icmp slt i8 0, [[L]]
 ; IS__TUNIT_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT_NPM:       t:
-; IS__TUNIT_NPM-NEXT:    ret i1 true
+; IS__TUNIT_NPM-NEXT:    [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR1]]
+; IS__TUNIT_NPM-NEXT:    ret i1 [[R]]
 ; IS__TUNIT_NPM:       f:
 ; IS__TUNIT_NPM-NEXT:    ret i1 false
 ;
@@ -1830,7 +2265,8 @@ define i1 @context(i8* %p) {
 ; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = icmp slt i8 0, [[L]]
 ; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC_NPM:       t:
-; IS__CGSCC_NPM-NEXT:    ret i1 true
+; IS__CGSCC_NPM-NEXT:    [[R:%.*]] = call i1 @non_zero(i8 [[L]]) #[[ATTR3]]
+; IS__CGSCC_NPM-NEXT:    ret i1 [[R]]
 ; IS__CGSCC_NPM:       f:
 ; IS__CGSCC_NPM-NEXT:    ret i1 false
 ;
@@ -1916,8 +2352,8 @@ declare void @barney(i32 signext, i32 signext)
 !1 = !{i32 10, i32 100}
 ;.
 ; IS__TUNIT_OPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR2]] = { nofree nosync nounwind readnone }
+; IS__TUNIT_OPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone }
+; IS__TUNIT_OPM: attributes #[[ATTR2]] = { nofree nosync nounwind readnone willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn }
 ;.
 ; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly willreturn }
@@ -1925,8 +2361,8 @@ declare void @barney(i32 signext, i32 signext)
 ; IS__TUNIT_NPM: attributes #[[ATTR2]] = { nofree nosync nounwind readonly willreturn }
 ;.
 ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone }
+; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone }
+; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR3]] = { readonly willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR4]] = { nounwind readnone }
 ; IS__CGSCC_OPM: attributes #[[ATTR5]] = { readnone willreturn }

diff  --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
index 4c078e91e708..800e844c5976 100644
--- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
+++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
@@ -43,7 +43,7 @@ define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32* @internal_ret1_rrw(i32* nofree align 4 [[R0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR2]]
 ; IS__TUNIT____-NEXT:    [[CALL2:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR3:[0-9]+]]
 ; IS__TUNIT____-NEXT:    [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree align 4 [[R0]], i32* nofree [[W0]]) #[[ATTR2]]
-; IS__TUNIT____-NEXT:    ret i32* [[W0]]
+; IS__TUNIT____-NEXT:    ret i32* [[CALL3]]
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@external_ret2_nrw
@@ -53,7 +53,7 @@ define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @internal_ret1_rrw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    [[CALL2:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR3:[0-9]+]]
 ; IS__CGSCC____-NEXT:    [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree nonnull align 4 dereferenceable(4) [[R0]], i32* nofree [[W0]]) #[[ATTR2]]
-; IS__CGSCC____-NEXT:    ret i32* [[W0]]
+; IS__CGSCC____-NEXT:    ret i32* [[CALL3]]
 ;
 entry:
   %call = call i32* @internal_ret0_nw(i32* %n0, i32* %w0)
@@ -86,12 +86,12 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 ; IS__TUNIT____-NEXT:    [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       return:
-; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[N0]], [[IF_END]] ], [ [[N0]], [[IF_THEN]] ]
-; IS__TUNIT____-NEXT:    ret i32* [[N0]]
+; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[CALL5]], [[IF_END]] ], [ [[N0]], [[IF_THEN]] ]
+; IS__TUNIT____-NEXT:    ret i32* [[RETVAL_0]]
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_ret0_nw
-; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-SAME: (i32* nofree returned [[N0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[R0:%.*]] = alloca i32, align 4
 ; IS__CGSCC____-NEXT:    [[R1:%.*]] = alloca i32, align 4
@@ -111,8 +111,8 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 ; IS__CGSCC____-NEXT:    [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
-; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[N0]], [[IF_END]] ], [ [[N0]], [[IF_THEN]] ]
-; IS__CGSCC____-NEXT:    ret i32* undef
+; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[CALL5]], [[IF_END]] ], [ [[N0]], [[IF_THEN]] ]
+; IS__CGSCC____-NEXT:    ret i32* [[RETVAL_0]]
 ;
 entry:
   %r0 = alloca i32, align 4
@@ -143,7 +143,7 @@ return:                                           ; preds = %if.end, %if.then
 define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@internal_ret1_rrw
-; IS__TUNIT____-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree [[R1:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree returned [[R1:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[TMP0:%.*]] = load i32, i32* [[R0]], align 4
 ; IS__TUNIT____-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
@@ -166,12 +166,12 @@ define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 ; IS__TUNIT____-NEXT:    [[CALL8:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       return:
-; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[R1]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ]
+; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[CALL8]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ]
 ; IS__TUNIT____-NEXT:    ret i32* undef
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_ret1_rrw
-; IS__CGSCC____-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree nonnull align 4 dereferenceable(4) [[R1:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree nonnull returned align 4 dereferenceable(4) [[R1:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[TMP0:%.*]] = load i32, i32* [[R0]], align 4
 ; IS__CGSCC____-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
@@ -194,7 +194,7 @@ define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 ; IS__CGSCC____-NEXT:    [[CALL8:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
-; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[R1]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ]
+; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[CALL8]], [[IF_END]] ], [ [[R1]], [[IF_THEN]] ]
 ; IS__CGSCC____-NEXT:    ret i32* undef
 ;
 entry:
@@ -293,12 +293,12 @@ define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 ; IS__TUNIT____-NEXT:    [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       return:
-; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[W0]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ]
-; IS__TUNIT____-NEXT:    ret i32* [[W0]]
+; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[CALL4]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ]
+; IS__TUNIT____-NEXT:    ret i32* [[RETVAL_0]]
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@internal_ret1_rw
-; IS__CGSCC____-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree [[W0:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-SAME: (i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[TMP0:%.*]] = load i32, i32* [[R0]], align 4
 ; IS__CGSCC____-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
@@ -315,8 +315,8 @@ define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 ; IS__CGSCC____-NEXT:    [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
-; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[W0]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ]
-; IS__CGSCC____-NEXT:    ret i32* undef
+; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32* [ [[CALL4]], [[IF_END]] ], [ [[W0]], [[IF_THEN]] ]
+; IS__CGSCC____-NEXT:    ret i32* [[RETVAL_0]]
 ;
 entry:
   %0 = load i32, i32* %r0, align 4
@@ -348,7 +348,7 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
 ; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR2]]
-; IS__TUNIT____-NEXT:    ret i32* [[W0]]
+; IS__TUNIT____-NEXT:    ret i32* [[CALL1]]
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@external_source_ret2_nrw
@@ -356,7 +356,7 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
 ; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR4]]
-; IS__CGSCC____-NEXT:    ret i32* [[W0]]
+; IS__CGSCC____-NEXT:    ret i32* [[CALL1]]
 ;
 entry:
   %call = call i32* @external_sink_ret2_nrw(i32* %n0, i32* %r0, i32* %w0)

diff  --git a/llvm/test/Transforms/Attributor/readattrs.ll b/llvm/test/Transforms/Attributor/readattrs.ll
index ced99bf38a42..9beec8f7c0e2 100644
--- a/llvm/test/Transforms/Attributor/readattrs.ll
+++ b/llvm/test/Transforms/Attributor/readattrs.ll
@@ -143,14 +143,16 @@ define void @test8_2(i32* %p) {
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test8_2
 ; IS__TUNIT____-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR0]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    store i32 10, i32* [[P]], align 4
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @test8_1(i32* noalias nofree readnone "no-capture-maybe-returned" [[P]]) #[[ATTR1]]
+; IS__TUNIT____-NEXT:    store i32 10, i32* [[CALL]], align 4
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test8_2
 ; IS__CGSCC____-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR3]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    store i32 10, i32* [[P]], align 4
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call align 4 i32* @test8_1(i32* noalias nofree readnone "no-capture-maybe-returned" [[P]]) #[[ATTR11:[0-9]+]]
+; IS__CGSCC____-NEXT:    store i32 10, i32* [[CALL]], align 4
 ; IS__CGSCC____-NEXT:    ret void
 ;
 entry:
@@ -174,7 +176,7 @@ define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
 ; IS__CGSCC____: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test9
 ; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) #[[ATTR4:[0-9]+]] {
-; IS__CGSCC____-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>) #[[ATTR11:[0-9]+]]
+; IS__CGSCC____-NEXT:    call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>) #[[ATTR12:[0-9]+]]
 ; IS__CGSCC____-NEXT:    ret void
 ;
   call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
@@ -193,7 +195,7 @@ define <4 x i32> @test10(<4 x i32*> %ptrs) {
 ; IS__CGSCC____: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test10
 ; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR5:[0-9]+]] {
-; IS__CGSCC____-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef) #[[ATTR12:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef) #[[ATTR13:[0-9]+]]
 ; IS__CGSCC____-NEXT:    ret <4 x i32> [[RES]]
 ;
   %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
@@ -231,7 +233,7 @@ define <4 x i32> @test12_2(<4 x i32*> %ptrs) {
 ; IS__CGSCC____: Function Attrs: argmemonly nounwind
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_2
 ; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]]) #[[ATTR7:[0-9]+]] {
-; IS__CGSCC____-NEXT:    [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR13:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) #[[ATTR14:[0-9]+]]
 ; IS__CGSCC____-NEXT:    ret <4 x i32> [[RES]]
 ;
   %res = call <4 x i32> @test12_1(<4 x i32*> %ptrs)
@@ -430,7 +432,25 @@ define void @ptr_uses(i8* %ptr) {
 define void @ptr_use_chain(i8* %ptr) {
 ; CHECK-LABEL: define {{[^@]+}}@ptr_use_chain
 ; CHECK-SAME: (i8* [[PTR:%.*]]) {
-; CHECK-NEXT:    call void @escape_i8(i8* [[PTR]])
+; CHECK-NEXT:    [[BC0:%.*]] = bitcast i8* [[PTR]] to i32*
+; CHECK-NEXT:    [[BC1:%.*]] = bitcast i32* [[BC0]] to i8*
+; CHECK-NEXT:    [[BC2:%.*]] = bitcast i8* [[BC1]] to i32*
+; CHECK-NEXT:    [[BC3:%.*]] = bitcast i32* [[BC2]] to i8*
+; CHECK-NEXT:    [[BC4:%.*]] = bitcast i8* [[BC3]] to i32*
+; CHECK-NEXT:    [[BC5:%.*]] = bitcast i32* [[BC4]] to i8*
+; CHECK-NEXT:    [[BC6:%.*]] = bitcast i8* [[BC5]] to i32*
+; CHECK-NEXT:    [[BC7:%.*]] = bitcast i32* [[BC6]] to i8*
+; CHECK-NEXT:    [[BC8:%.*]] = bitcast i8* [[BC7]] to i32*
+; CHECK-NEXT:    [[BC9:%.*]] = bitcast i32* [[BC8]] to i8*
+; CHECK-NEXT:    [[ABC2:%.*]] = bitcast i8* [[BC9]] to i32*
+; CHECK-NEXT:    [[ABC3:%.*]] = bitcast i32* [[ABC2]] to i8*
+; CHECK-NEXT:    [[ABC4:%.*]] = bitcast i8* [[ABC3]] to i32*
+; CHECK-NEXT:    [[ABC5:%.*]] = bitcast i32* [[ABC4]] to i8*
+; CHECK-NEXT:    [[ABC6:%.*]] = bitcast i8* [[ABC5]] to i32*
+; CHECK-NEXT:    [[ABC7:%.*]] = bitcast i32* [[ABC6]] to i8*
+; CHECK-NEXT:    [[ABC8:%.*]] = bitcast i8* [[ABC7]] to i32*
+; CHECK-NEXT:    [[ABC9:%.*]] = bitcast i32* [[ABC8]] to i8*
+; CHECK-NEXT:    call void @escape_i8(i8* [[ABC9]])
 ; CHECK-NEXT:    ret void
 ;
   %bc0 = bitcast i8* %ptr to i32*
@@ -498,7 +518,8 @@ define i32 @read_only_constant_mem() {
 ; IS__CGSCC____: attributes #[[ATTR8]] = { argmemonly nofree norecurse nounwind willreturn }
 ; IS__CGSCC____: attributes #[[ATTR9]] = { readnone }
 ; IS__CGSCC____: attributes #[[ATTR10]] = { nounwind readonly }
-; IS__CGSCC____: attributes #[[ATTR11]] = { willreturn writeonly }
-; IS__CGSCC____: attributes #[[ATTR12]] = { readonly willreturn }
-; IS__CGSCC____: attributes #[[ATTR13]] = { nounwind }
+; IS__CGSCC____: attributes #[[ATTR11]] = { readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR12]] = { willreturn writeonly }
+; IS__CGSCC____: attributes #[[ATTR13]] = { readonly willreturn }
+; IS__CGSCC____: attributes #[[ATTR14]] = { nounwind }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/returned.ll b/llvm/test/Transforms/Attributor/returned.ll
index 584c3444893e..293ead3d0e54 100644
--- a/llvm/test/Transforms/Attributor/returned.ll
+++ b/llvm/test/Transforms/Attributor/returned.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 ;
@@ -64,15 +64,17 @@ define i32 @scc_r1(i32 %a, i32 %r, i32 %b) #0 {
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@scc_r1
 ; IS__TUNIT____-SAME: (i32 [[A:%.*]], i32 returned [[R:%.*]], i32 [[B:%.*]]) #[[ATTR1:[0-9]+]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[A]], i32 [[R]]) #[[ATTR6:[0-9]+]]
-; IS__TUNIT____-NEXT:    ret i32 [[R]]
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32 @sink_r0(i32 [[R]]) #[[ATTR5:[0-9]+]]
+; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[A]], i32 [[CALL]]) #[[ATTR6:[0-9]+]]
+; IS__TUNIT____-NEXT:    ret i32 [[CALL1]]
 ;
-; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn
+; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@scc_r1
 ; IS__CGSCC____-SAME: (i32 [[A:%.*]], i32 returned [[R:%.*]], i32 [[B:%.*]]) #[[ATTR1:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[A]], i32 [[R]]) #[[ATTR7:[0-9]+]]
-; IS__CGSCC____-NEXT:    ret i32 [[R]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32 @sink_r0(i32 [[R]]) #[[ATTR6:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[A]], i32 [[CALL]]) #[[ATTR7:[0-9]+]]
+; IS__CGSCC____-NEXT:    ret i32 [[CALL1]]
 ;
 entry:
   %call = call i32 @sink_r0(i32 %r)
@@ -88,18 +90,20 @@ define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 {
 ; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A]], [[B]]
 ; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__TUNIT____:       if.then:
-; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[A]], i32 [[R]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32 @sink_r0(i32 [[R]]) #[[ATTR5]]
+; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[A]], i32 [[CALL]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN:%.*]]
 ; IS__TUNIT____:       if.end:
 ; IS__TUNIT____-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[A]], [[B]]
 ; IS__TUNIT____-NEXT:    br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]]
 ; IS__TUNIT____:       if.then3:
+; IS__TUNIT____-NEXT:    [[CALL4:%.*]] = call i32 @sink_r0(i32 [[B]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    [[CALL5:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[B]], i32 undef) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    [[CALL6:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[R]], i32 [[R]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL7:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[R]], i32 undef) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL7:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[CALL6]], i32 undef) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    [[CALL8:%.*]] = call i32 @scc_r2(i32 [[A]], i32 [[B]], i32 [[R]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL9:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[R]], i32 [[R]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL11:%.*]] = call i32 @scc_r1(i32 [[B]], i32 [[R]], i32 undef) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL9:%.*]] = call i32 @scc_r2(i32 [[CALL5]], i32 [[CALL7]], i32 [[CALL8]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL11:%.*]] = call i32 @scc_r1(i32 [[CALL4]], i32 [[CALL9]], i32 undef) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       if.end12:
 ; IS__TUNIT____-NEXT:    [[CMP13:%.*]] = icmp eq i32 [[A]], [[B]]
@@ -110,31 +114,33 @@ define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 {
 ; IS__TUNIT____-NEXT:    [[CALL14:%.*]] = call i32 @scc_r2(i32 [[A]], i32 [[B]], i32 [[R]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[COND_END]]
 ; IS__TUNIT____:       cond.end:
-; IS__TUNIT____-NEXT:    [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ]
+; IS__TUNIT____-NEXT:    [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[CALL14]], [[COND_FALSE]] ]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       return:
-; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
-; IS__TUNIT____-NEXT:    ret i32 [[R]]
+; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ [[CALL11]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
+; IS__TUNIT____-NEXT:    ret i32 [[RETVAL_0]]
 ;
-; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn
+; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@scc_r2
 ; IS__CGSCC____-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 returned [[R:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A]], [[B]]
 ; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__CGSCC____:       if.then:
-; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[A]], i32 [[R]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32 @sink_r0(i32 [[R]]) #[[ATTR6]]
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[A]], i32 [[CALL]]) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN:%.*]]
 ; IS__CGSCC____:       if.end:
 ; IS__CGSCC____-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[A]], [[B]]
 ; IS__CGSCC____-NEXT:    br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]]
 ; IS__CGSCC____:       if.then3:
+; IS__CGSCC____-NEXT:    [[CALL4:%.*]] = call i32 @sink_r0(i32 [[B]])
 ; IS__CGSCC____-NEXT:    [[CALL5:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[B]], i32 undef) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    [[CALL6:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[R]], i32 [[R]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    [[CALL7:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[R]], i32 undef) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL7:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[CALL6]], i32 undef) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    [[CALL8:%.*]] = call i32 @scc_r2(i32 [[A]], i32 [[B]], i32 [[R]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    [[CALL9:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[R]], i32 [[R]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    [[CALL11:%.*]] = call i32 @scc_r1(i32 [[B]], i32 [[R]], i32 undef) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL9:%.*]] = call i32 @scc_r2(i32 [[CALL5]], i32 [[CALL7]], i32 [[CALL8]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL11:%.*]] = call i32 @scc_r1(i32 [[CALL4]], i32 [[CALL9]], i32 undef) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       if.end12:
 ; IS__CGSCC____-NEXT:    [[CMP13:%.*]] = icmp eq i32 [[A]], [[B]]
@@ -145,11 +151,11 @@ define i32 @scc_r2(i32 %a, i32 %b, i32 %r) #0 {
 ; IS__CGSCC____-NEXT:    [[CALL14:%.*]] = call i32 @scc_r2(i32 [[A]], i32 [[B]], i32 [[R]]) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[COND_END]]
 ; IS__CGSCC____:       cond.end:
-; IS__CGSCC____-NEXT:    [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ]
+; IS__CGSCC____-NEXT:    [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[CALL14]], [[COND_FALSE]] ]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
-; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
-; IS__CGSCC____-NEXT:    ret i32 [[R]]
+; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ [[CALL11]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
+; IS__CGSCC____-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
   %cmp = icmp sgt i32 %a, %b
@@ -203,18 +209,20 @@ define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 {
 ; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A]], [[B]]
 ; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__TUNIT____:       if.then:
-; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[A]], i32 [[R]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32 @sink_r0(i32 [[R]]) #[[ATTR5]]
+; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[A]], i32 [[CALL]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN:%.*]]
 ; IS__TUNIT____:       if.end:
 ; IS__TUNIT____-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[A]], [[B]]
 ; IS__TUNIT____-NEXT:    br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]]
 ; IS__TUNIT____:       if.then3:
+; IS__TUNIT____-NEXT:    [[CALL4:%.*]] = call i32 @sink_r0(i32 [[B]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    [[CALL5:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[B]], i32 undef) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    [[CALL6:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[R]], i32 [[R]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL7:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[R]], i32 undef) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL7:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[CALL6]], i32 undef) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    [[CALL8:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[B]], i32 undef) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL9:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[R]], i32 [[B]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL11:%.*]] = call i32 @scc_r1(i32 [[B]], i32 [[B]], i32 undef) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL9:%.*]] = call i32 @scc_r2(i32 [[CALL5]], i32 [[CALL7]], i32 [[CALL8]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL11:%.*]] = call i32 @scc_r1(i32 [[CALL4]], i32 [[CALL9]], i32 undef) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       if.end12:
 ; IS__TUNIT____-NEXT:    [[CMP13:%.*]] = icmp eq i32 [[A]], [[B]]
@@ -225,24 +233,33 @@ define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 {
 ; IS__TUNIT____-NEXT:    [[CALL14:%.*]] = call i32 @scc_r2(i32 [[A]], i32 [[B]], i32 [[R]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[COND_END]]
 ; IS__TUNIT____:       cond.end:
-; IS__TUNIT____-NEXT:    [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ]
+; IS__TUNIT____-NEXT:    [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[CALL14]], [[COND_FALSE]] ]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       return:
-; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[B]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
+; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ [[CALL11]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
 ; IS__TUNIT____-NEXT:    ret i32 [[RETVAL_0]]
 ;
-; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable willreturn
+; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@scc_rX
 ; IS__CGSCC____-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[R:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A]], [[B]]
 ; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__CGSCC____:       if.then:
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32 @sink_r0(i32 [[R]]) #[[ATTR6]]
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32 @scc_r2(i32 [[B]], i32 [[A]], i32 [[CALL]]) #[[ATTR8:[0-9]+]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN:%.*]]
 ; IS__CGSCC____:       if.end:
 ; IS__CGSCC____-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[A]], [[B]]
 ; IS__CGSCC____-NEXT:    br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]]
 ; IS__CGSCC____:       if.then3:
+; IS__CGSCC____-NEXT:    [[CALL4:%.*]] = call i32 @sink_r0(i32 [[B]])
+; IS__CGSCC____-NEXT:    [[CALL5:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[B]], i32 undef) #[[ATTR8]]
+; IS__CGSCC____-NEXT:    [[CALL6:%.*]] = call i32 @scc_r2(i32 [[R]], i32 [[R]], i32 [[R]]) #[[ATTR8]]
+; IS__CGSCC____-NEXT:    [[CALL7:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[CALL6]], i32 undef) #[[ATTR8]]
+; IS__CGSCC____-NEXT:    [[CALL8:%.*]] = call i32 @scc_r1(i32 [[A]], i32 [[B]], i32 undef) #[[ATTR8]]
+; IS__CGSCC____-NEXT:    [[CALL9:%.*]] = call i32 @scc_r2(i32 [[CALL5]], i32 [[CALL7]], i32 [[CALL8]]) #[[ATTR8]]
+; IS__CGSCC____-NEXT:    [[CALL11:%.*]] = call i32 @scc_r1(i32 [[CALL4]], i32 [[CALL9]], i32 undef) #[[ATTR8]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       if.end12:
 ; IS__CGSCC____-NEXT:    [[CMP13:%.*]] = icmp eq i32 [[A]], [[B]]
@@ -250,12 +267,13 @@ define i32 @scc_rX(i32 %a, i32 %b, i32 %r) #0 {
 ; IS__CGSCC____:       cond.true:
 ; IS__CGSCC____-NEXT:    br label [[COND_END:%.*]]
 ; IS__CGSCC____:       cond.false:
+; IS__CGSCC____-NEXT:    [[CALL14:%.*]] = call i32 @scc_r2(i32 [[A]], i32 [[B]], i32 [[R]]) #[[ATTR8]]
 ; IS__CGSCC____-NEXT:    br label [[COND_END]]
 ; IS__CGSCC____:       cond.end:
-; IS__CGSCC____-NEXT:    [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ]
+; IS__CGSCC____-NEXT:    [[COND:%.*]] = phi i32 [ [[R]], [[COND_TRUE]] ], [ [[CALL14]], [[COND_FALSE]] ]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
-; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[R]], [[IF_THEN]] ], [ [[B]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
+; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[CALL1]], [[IF_THEN]] ], [ [[CALL11]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
 ; IS__CGSCC____-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
@@ -343,17 +361,19 @@ entry:
 define double* @ptr_scc_r1(double* %a, double* %r, double* %b) #0 {
 ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@ptr_scc_r1
-; IS__TUNIT____-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone returned [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) #[[ATTR1]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[R]], double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    ret double* [[R]]
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR5]]
+; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    ret double* [[CALL1]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@ptr_scc_r1
-; IS__CGSCC____-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__CGSCC____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone returned [[R:%.*]], double* nocapture nofree readnone [[B:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[R]], double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    ret double* [[R]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR6]]
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    ret double* [[CALL1]]
 ;
 entry:
   %call = call double* @ptr_sink_r0(double* %r)
@@ -364,23 +384,25 @@ entry:
 define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 {
 ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@ptr_scc_r2
-; IS__TUNIT____-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nocapture nofree readnone [[B:%.*]], double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]], double* nofree readnone returned [[R:%.*]]) #[[ATTR1]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp ugt double* [[A]], [[B]]
 ; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__TUNIT____:       if.then:
-; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[B]], double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR5]]
+; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN:%.*]]
 ; IS__TUNIT____:       if.end:
 ; IS__TUNIT____-NEXT:    [[CMP2:%.*]] = icmp ult double* [[A]], [[B]]
 ; IS__TUNIT____-NEXT:    br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]]
 ; IS__TUNIT____:       if.then3:
-; IS__TUNIT____-NEXT:    [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[B]], double* noalias nocapture nofree readnone undef) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[R]], double* noalias nocapture nofree readnone [[R]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]], double* noalias nocapture nofree readnone undef) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[A]], double* noalias nocapture nofree readnone [[B]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[B]], double* noalias nocapture nofree readnone [[R]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nocapture nofree readnone [[B]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]], double* noalias nocapture nofree readnone undef) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nocapture nofree readnone undef) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[CALL5]], double* noalias nofree readnone [[CALL7]], double* noalias nofree readnone [[CALL8]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nocapture nofree readnone undef) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       if.end12:
 ; IS__TUNIT____-NEXT:    [[CMP13:%.*]] = icmp eq double* [[A]], [[B]]
@@ -388,34 +410,36 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 {
 ; IS__TUNIT____:       cond.true:
 ; IS__TUNIT____-NEXT:    br label [[COND_END:%.*]]
 ; IS__TUNIT____:       cond.false:
-; IS__TUNIT____-NEXT:    [[CALL14:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[A]], double* noalias nocapture nofree readnone [[B]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL14:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[COND_END]]
 ; IS__TUNIT____:       cond.end:
-; IS__TUNIT____-NEXT:    [[COND:%.*]] = phi double* [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ]
+; IS__TUNIT____-NEXT:    [[COND:%.*]] = phi double* [ [[R]], [[COND_TRUE]] ], [ [[CALL14]], [[COND_FALSE]] ]
 ; IS__TUNIT____-NEXT:    br label [[RETURN]]
 ; IS__TUNIT____:       return:
-; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi double* [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
-; IS__TUNIT____-NEXT:    ret double* [[R]]
+; IS__TUNIT____-NEXT:    [[RETVAL_0:%.*]] = phi double* [ [[CALL1]], [[IF_THEN]] ], [ [[CALL11]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
+; IS__TUNIT____-NEXT:    ret double* [[RETVAL_0]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@ptr_scc_r2
-; IS__CGSCC____-SAME: (double* nocapture nofree readnone [[A:%.*]], double* nocapture nofree readnone [[B:%.*]], double* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR2]] {
+; IS__CGSCC____-SAME: (double* nofree readnone [[A:%.*]], double* nofree readnone [[B:%.*]], double* nofree readnone returned [[R:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp ugt double* [[A]], [[B]]
 ; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__CGSCC____:       if.then:
-; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[B]], double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR6]]
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[B]], double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL]]) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN:%.*]]
 ; IS__CGSCC____:       if.end:
 ; IS__CGSCC____-NEXT:    [[CMP2:%.*]] = icmp ult double* [[A]], [[B]]
 ; IS__CGSCC____-NEXT:    br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END12:%.*]]
 ; IS__CGSCC____:       if.then3:
-; IS__CGSCC____-NEXT:    [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[B]], double* noalias nocapture nofree readnone undef) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[R]], double* noalias nocapture nofree readnone [[R]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nocapture nofree readnone [[A]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]], double* noalias nocapture nofree readnone undef) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[A]], double* noalias nocapture nofree readnone [[B]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[B]], double* noalias nocapture nofree readnone [[R]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nocapture nofree readnone [[B]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]], double* noalias nocapture nofree readnone undef) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL4:%.*]] = call double* @ptr_sink_r0(double* noalias nofree readnone "no-capture-maybe-returned" [[B]])
+; IS__CGSCC____-NEXT:    [[CALL5:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nocapture nofree readnone undef) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL6:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]], double* noalias nofree readnone [[R]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL7:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[CALL6]], double* noalias nocapture nofree readnone undef) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL8:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL9:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[CALL5]], double* noalias nofree readnone [[CALL7]], double* noalias nofree readnone [[CALL8]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL11:%.*]] = call double* @ptr_scc_r1(double* noalias nofree readnone [[CALL4]], double* noalias nofree readnone [[CALL9]], double* noalias nocapture nofree readnone undef) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       if.end12:
 ; IS__CGSCC____-NEXT:    [[CMP13:%.*]] = icmp eq double* [[A]], [[B]]
@@ -423,14 +447,14 @@ define double* @ptr_scc_r2(double* %a, double* %b, double* %r) #0 {
 ; IS__CGSCC____:       cond.true:
 ; IS__CGSCC____-NEXT:    br label [[COND_END:%.*]]
 ; IS__CGSCC____:       cond.false:
-; IS__CGSCC____-NEXT:    [[CALL14:%.*]] = call double* @ptr_scc_r2(double* noalias nocapture nofree readnone [[A]], double* noalias nocapture nofree readnone [[B]], double* noalias nofree readnone "no-capture-maybe-returned" [[R]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL14:%.*]] = call double* @ptr_scc_r2(double* noalias nofree readnone [[A]], double* noalias nofree readnone [[B]], double* noalias nofree readnone [[R]]) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[COND_END]]
 ; IS__CGSCC____:       cond.end:
-; IS__CGSCC____-NEXT:    [[COND:%.*]] = phi double* [ [[R]], [[COND_TRUE]] ], [ [[R]], [[COND_FALSE]] ]
+; IS__CGSCC____-NEXT:    [[COND:%.*]] = phi double* [ [[R]], [[COND_TRUE]] ], [ [[CALL14]], [[COND_FALSE]] ]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
-; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi double* [ [[R]], [[IF_THEN]] ], [ [[R]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
-; IS__CGSCC____-NEXT:    ret double* [[R]]
+; IS__CGSCC____-NEXT:    [[RETVAL_0:%.*]] = phi double* [ [[CALL1]], [[IF_THEN]] ], [ [[CALL11]], [[IF_THEN3]] ], [ [[COND]], [[COND_END]] ]
+; IS__CGSCC____-NEXT:    ret double* [[RETVAL_0]]
 ;
 entry:
   %cmp = icmp ugt double* %a, %b
@@ -492,7 +516,7 @@ define i32* @rt0(i32* %a) #0 {
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@rt0
-; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    unreachable
 ;
@@ -519,7 +543,7 @@ define i32* @rt1(i32* %a) #0 {
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline norecurse noreturn nosync nounwind readnone uwtable willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@rt1
-; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] {
+; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readnone align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    unreachable
 ;
@@ -539,14 +563,14 @@ define i32* @rt2_helper(i32* %a) #0 {
 ; IS__TUNIT____-SAME: (i32* nofree readnone returned [[A:%.*]]) #[[ATTR1]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @rt2(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[A]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    ret i32* [[A]]
+; IS__TUNIT____-NEXT:    ret i32* [[CALL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@rt2_helper
-; IS__CGSCC____-SAME: (i32* nofree readnone returned [[A:%.*]]) #[[ATTR2]] {
+; IS__CGSCC____-SAME: (i32* nofree readnone returned [[A:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @rt2(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[A]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    ret i32* [[A]]
+; IS__CGSCC____-NEXT:    ret i32* [[CALL]]
 ;
 entry:
   %call = call i32* @rt2(i32* %a, i32* %a)
@@ -564,12 +588,12 @@ define i32* @rt2(i32* %a, i32 *%b) #0 {
 ; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @rt2_helper(i32* noalias nofree readnone [[A]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[IF_END]]
 ; IS__TUNIT____:       if.end:
-; IS__TUNIT____-NEXT:    [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[A]], [[IF_THEN]] ]
+; IS__TUNIT____-NEXT:    [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[CALL]], [[IF_THEN]] ]
 ; IS__TUNIT____-NEXT:    ret i32* [[SEL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@rt2
-; IS__CGSCC____-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR2]] {
+; IS__CGSCC____-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32* [[A]], null
 ; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
@@ -577,7 +601,7 @@ define i32* @rt2(i32* %a, i32 *%b) #0 {
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @rt2_helper(i32* noalias nofree readnone [[A]]) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[IF_END]]
 ; IS__CGSCC____:       if.end:
-; IS__CGSCC____-NEXT:    [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[A]], [[IF_THEN]] ]
+; IS__CGSCC____-NEXT:    [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[CALL]], [[IF_THEN]] ]
 ; IS__CGSCC____-NEXT:    ret i32* [[SEL]]
 ;
 entry:
@@ -598,17 +622,17 @@ if.end:
 define i32* @rt3_helper(i32* %a, i32* %b) #0 {
 ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@rt3_helper
-; IS__TUNIT____-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @rt3(i32* noalias nocapture nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR6]]
-; IS__TUNIT____-NEXT:    ret i32* [[B]]
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @rt3(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    ret i32* [[CALL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@rt3_helper
-; IS__CGSCC____-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR2]] {
+; IS__CGSCC____-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @rt3(i32* noalias nocapture nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR7]]
-; IS__CGSCC____-NEXT:    ret i32* [[B]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @rt3(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    ret i32* [[CALL]]
 ;
 entry:
   %call = call i32* @rt3(i32* %a, i32* %b)
@@ -618,29 +642,29 @@ entry:
 define i32* @rt3(i32* %a, i32 *%b) #0 {
 ; IS__TUNIT____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@rt3
-; IS__TUNIT____-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp eq i32* [[A]], null
 ; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__TUNIT____:       if.then:
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @rt3_helper(i32* noalias nocapture nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR6]]
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @rt3_helper(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[IF_END]]
 ; IS__TUNIT____:       if.end:
-; IS__TUNIT____-NEXT:    [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[IF_THEN]] ]
-; IS__TUNIT____-NEXT:    ret i32* [[B]]
+; IS__TUNIT____-NEXT:    [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[CALL]], [[IF_THEN]] ]
+; IS__TUNIT____-NEXT:    ret i32* [[SEL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@rt3
-; IS__CGSCC____-SAME: (i32* nocapture nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR2]] {
+; IS__CGSCC____-SAME: (i32* nofree readnone [[A:%.*]], i32* nofree readnone returned "no-capture-maybe-returned" [[B:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32* [[A]], null
 ; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__CGSCC____:       if.then:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @rt3_helper(i32* noalias nocapture nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR7]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @rt3_helper(i32* noalias nofree readnone [[A]], i32* noalias nofree readnone "no-capture-maybe-returned" [[B]]) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[IF_END]]
 ; IS__CGSCC____:       if.end:
-; IS__CGSCC____-NEXT:    [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[B]], [[IF_THEN]] ]
-; IS__CGSCC____-NEXT:    ret i32* [[B]]
+; IS__CGSCC____-NEXT:    [[SEL:%.*]] = phi i32* [ [[B]], [[ENTRY:%.*]] ], [ [[CALL]], [[IF_THEN]] ]
+; IS__CGSCC____-NEXT:    ret i32* [[SEL]]
 ;
 entry:
   %cmp = icmp eq i32* %a, null
@@ -675,8 +699,8 @@ define i32* @calls_unknown_fn(i32* %r) #0 {
 ;
 ; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@calls_unknown_fn
-; IS__CGSCC____-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR4:[0-9]+]] {
-; IS__CGSCC____-NEXT:    tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR8:[0-9]+]]
+; IS__CGSCC____-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[R:%.*]]) #[[ATTR3:[0-9]+]] {
+; IS__CGSCC____-NEXT:    tail call void @unknown_fn(i32* (i32*)* noundef nonnull @calls_unknown_fn) #[[ATTR9:[0-9]+]]
 ; IS__CGSCC____-NEXT:    ret i32* [[R]]
 ;
   tail call void @unknown_fn(i32* (i32*)* nonnull @calls_unknown_fn)
@@ -698,17 +722,11 @@ define i32* @calls_unknown_fn(i32* %r) #0 {
 ; Verify the maybe-redefined function is not annotated:
 ;
 define linkonce_odr i32* @maybe_redefined_fn(i32* %r) #0 {
-; IS__TUNIT____: Function Attrs: noinline nounwind uwtable
-; IS__TUNIT____-LABEL: define {{[^@]+}}@maybe_redefined_fn
-; IS__TUNIT____-SAME: (i32* [[R:%.*]]) #[[ATTR3]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    ret i32* [[R]]
-;
-; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
-; IS__CGSCC____-LABEL: define {{[^@]+}}@maybe_redefined_fn
-; IS__CGSCC____-SAME: (i32* [[R:%.*]]) #[[ATTR4]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    ret i32* [[R]]
+; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK-LABEL: define {{[^@]+}}@maybe_redefined_fn
+; CHECK-SAME: (i32* [[R:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32* [[R]]
 ;
 entry:
   ret i32* %r
@@ -724,9 +742,9 @@ define i32* @calls_maybe_redefined_fn(i32* %r) #0 {
 ;
 ; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn
-; IS__CGSCC____-SAME: (i32* returned [[R:%.*]]) #[[ATTR4]] {
+; IS__CGSCC____-SAME: (i32* returned [[R:%.*]]) #[[ATTR3]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR8]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @maybe_redefined_fn(i32* [[R]]) #[[ATTR9]]
 ; IS__CGSCC____-NEXT:    ret i32* [[R]]
 ;
 entry:
@@ -747,17 +765,11 @@ entry:
 ; Verify the maybe-redefined function is not annotated:
 ;
 define linkonce_odr i32* @maybe_redefined_fn2(i32* %r) #0 {
-; IS__TUNIT____: Function Attrs: noinline nounwind uwtable
-; IS__TUNIT____-LABEL: define {{[^@]+}}@maybe_redefined_fn2
-; IS__TUNIT____-SAME: (i32* [[R:%.*]]) #[[ATTR3]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    ret i32* [[R]]
-;
-; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
-; IS__CGSCC____-LABEL: define {{[^@]+}}@maybe_redefined_fn2
-; IS__CGSCC____-SAME: (i32* [[R:%.*]]) #[[ATTR4]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    ret i32* [[R]]
+; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK-LABEL: define {{[^@]+}}@maybe_redefined_fn2
+; CHECK-SAME: (i32* [[R:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret i32* [[R]]
 ;
 entry:
   ret i32* %r
@@ -773,9 +785,9 @@ define i32* @calls_maybe_redefined_fn2(i32* %r) #0 {
 ;
 ; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@calls_maybe_redefined_fn2
-; IS__CGSCC____-SAME: (i32* [[R:%.*]]) #[[ATTR4]] {
+; IS__CGSCC____-SAME: (i32* [[R:%.*]]) #[[ATTR3]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR8]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @maybe_redefined_fn2(i32* [[R]]) #[[ATTR9]]
 ; IS__CGSCC____-NEXT:    ret i32* [[CALL]]
 ;
 entry:
@@ -803,7 +815,10 @@ define double @select_and_phi(double %b) #0 {
 ; IS__TUNIT____:       if.then:
 ; IS__TUNIT____-NEXT:    br label [[IF_END]]
 ; IS__TUNIT____:       if.end:
-; IS__TUNIT____-NEXT:    ret double [[B]]
+; IS__TUNIT____-NEXT:    [[PHI:%.*]] = phi double [ [[B]], [[IF_THEN]] ], [ [[B]], [[ENTRY:%.*]] ]
+; IS__TUNIT____-NEXT:    [[CMP1:%.*]] = fcmp oeq double [[B]], 0.000000e+00
+; IS__TUNIT____-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], double [[B]], double [[PHI]]
+; IS__TUNIT____-NEXT:    ret double [[SEL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline norecurse nosync nounwind readnone uwtable willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@select_and_phi
@@ -814,7 +829,10 @@ define double @select_and_phi(double %b) #0 {
 ; IS__CGSCC____:       if.then:
 ; IS__CGSCC____-NEXT:    br label [[IF_END]]
 ; IS__CGSCC____:       if.end:
-; IS__CGSCC____-NEXT:    ret double [[B]]
+; IS__CGSCC____-NEXT:    [[PHI:%.*]] = phi double [ [[B]], [[IF_THEN]] ], [ [[B]], [[ENTRY:%.*]] ]
+; IS__CGSCC____-NEXT:    [[CMP1:%.*]] = fcmp oeq double [[B]], 0.000000e+00
+; IS__CGSCC____-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], double [[B]], double [[PHI]]
+; IS__CGSCC____-NEXT:    ret double [[SEL]]
 ;
 entry:
   %cmp = fcmp ogt double %b, 0.000000e+00
@@ -852,11 +870,14 @@ define double @recursion_select_and_phi(i32 %a, double %b) #0 {
 ; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call double @recursion_select_and_phi(i32 [[DEC]], double [[B]]) #[[ATTR6]]
 ; IS__TUNIT____-NEXT:    br label [[IF_END]]
 ; IS__TUNIT____:       if.end:
-; IS__TUNIT____-NEXT:    ret double [[B]]
+; IS__TUNIT____-NEXT:    [[PHI:%.*]] = phi double [ [[CALL]], [[IF_THEN]] ], [ [[B]], [[ENTRY:%.*]] ]
+; IS__TUNIT____-NEXT:    [[CMP1:%.*]] = fcmp oeq double [[B]], 0.000000e+00
+; IS__TUNIT____-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], double [[B]], double [[PHI]]
+; IS__TUNIT____-NEXT:    ret double [[SEL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree noinline nosync nounwind readnone uwtable
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@recursion_select_and_phi
-; IS__CGSCC____-SAME: (i32 [[A:%.*]], double returned [[B:%.*]]) #[[ATTR2]] {
+; IS__CGSCC____-SAME: (i32 [[A:%.*]], double returned [[B:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[DEC:%.*]] = add nsw i32 [[A]], -1
 ; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[A]], 0
@@ -865,7 +886,10 @@ define double @recursion_select_and_phi(i32 %a, double %b) #0 {
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call double @recursion_select_and_phi(i32 [[DEC]], double [[B]]) #[[ATTR7]]
 ; IS__CGSCC____-NEXT:    br label [[IF_END]]
 ; IS__CGSCC____:       if.end:
-; IS__CGSCC____-NEXT:    ret double [[B]]
+; IS__CGSCC____-NEXT:    [[PHI:%.*]] = phi double [ [[CALL]], [[IF_THEN]] ], [ [[B]], [[ENTRY:%.*]] ]
+; IS__CGSCC____-NEXT:    [[CMP1:%.*]] = fcmp oeq double [[B]], 0.000000e+00
+; IS__CGSCC____-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], double [[B]], double [[PHI]]
+; IS__CGSCC____-NEXT:    ret double [[SEL]]
 ;
 entry:
   %dec = add nsw i32 %a, -1
@@ -1183,29 +1207,17 @@ ret_undef1:
 declare i32* @unknown(i32*)
 
 define i32* @ret_arg_or_unknown(i32* %b) #0 {
-; IS__TUNIT____: Function Attrs: noinline nounwind uwtable
-; IS__TUNIT____-LABEL: define {{[^@]+}}@ret_arg_or_unknown
-; IS__TUNIT____-SAME: (i32* [[B:%.*]]) #[[ATTR3]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp eq i32* [[B]], null
-; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[RET_ARG:%.*]], label [[RET_UNKNOWN:%.*]]
-; IS__TUNIT____:       ret_arg:
-; IS__TUNIT____-NEXT:    ret i32* [[B]]
-; IS__TUNIT____:       ret_unknown:
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @unknown(i32* [[B]])
-; IS__TUNIT____-NEXT:    ret i32* [[CALL]]
-;
-; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
-; IS__CGSCC____-LABEL: define {{[^@]+}}@ret_arg_or_unknown
-; IS__CGSCC____-SAME: (i32* [[B:%.*]]) #[[ATTR4]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32* [[B]], null
-; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[RET_ARG:%.*]], label [[RET_UNKNOWN:%.*]]
-; IS__CGSCC____:       ret_arg:
-; IS__CGSCC____-NEXT:    ret i32* [[B]]
-; IS__CGSCC____:       ret_unknown:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @unknown(i32* [[B]])
-; IS__CGSCC____-NEXT:    ret i32* [[CALL]]
+; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK-LABEL: define {{[^@]+}}@ret_arg_or_unknown
+; CHECK-SAME: (i32* [[B:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[B]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[RET_ARG:%.*]], label [[RET_UNKNOWN:%.*]]
+; CHECK:       ret_arg:
+; CHECK-NEXT:    ret i32* [[B]]
+; CHECK:       ret_unknown:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32* @unknown(i32* [[B]])
+; CHECK-NEXT:    ret i32* [[CALL]]
 ;
 entry:
   %cmp = icmp eq i32* %b, null
@@ -1220,35 +1232,20 @@ ret_unknown:
 }
 
 define i32* @ret_arg_or_unknown_through_phi(i32* %b) #0 {
-; IS__TUNIT____: Function Attrs: noinline nounwind uwtable
-; IS__TUNIT____-LABEL: define {{[^@]+}}@ret_arg_or_unknown_through_phi
-; IS__TUNIT____-SAME: (i32* [[B:%.*]]) #[[ATTR3]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp eq i32* [[B]], null
-; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[RET_ARG:%.*]], label [[RET_UNKNOWN:%.*]]
-; IS__TUNIT____:       ret_arg:
-; IS__TUNIT____-NEXT:    br label [[R:%.*]]
-; IS__TUNIT____:       ret_unknown:
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @unknown(i32* [[B]])
-; IS__TUNIT____-NEXT:    br label [[R]]
-; IS__TUNIT____:       r:
-; IS__TUNIT____-NEXT:    [[PHI:%.*]] = phi i32* [ [[B]], [[RET_ARG]] ], [ [[CALL]], [[RET_UNKNOWN]] ]
-; IS__TUNIT____-NEXT:    ret i32* [[PHI]]
-;
-; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
-; IS__CGSCC____-LABEL: define {{[^@]+}}@ret_arg_or_unknown_through_phi
-; IS__CGSCC____-SAME: (i32* [[B:%.*]]) #[[ATTR4]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32* [[B]], null
-; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[RET_ARG:%.*]], label [[RET_UNKNOWN:%.*]]
-; IS__CGSCC____:       ret_arg:
-; IS__CGSCC____-NEXT:    br label [[R:%.*]]
-; IS__CGSCC____:       ret_unknown:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @unknown(i32* [[B]])
-; IS__CGSCC____-NEXT:    br label [[R]]
-; IS__CGSCC____:       r:
-; IS__CGSCC____-NEXT:    [[PHI:%.*]] = phi i32* [ [[B]], [[RET_ARG]] ], [ [[CALL]], [[RET_UNKNOWN]] ]
-; IS__CGSCC____-NEXT:    ret i32* [[PHI]]
+; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK-LABEL: define {{[^@]+}}@ret_arg_or_unknown_through_phi
+; CHECK-SAME: (i32* [[B:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[B]], null
+; CHECK-NEXT:    br i1 [[CMP]], label [[RET_ARG:%.*]], label [[RET_UNKNOWN:%.*]]
+; CHECK:       ret_arg:
+; CHECK-NEXT:    br label [[R:%.*]]
+; CHECK:       ret_unknown:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32* @unknown(i32* [[B]])
+; CHECK-NEXT:    br label [[R]]
+; CHECK:       r:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32* [ [[B]], [[RET_ARG]] ], [ [[CALL]], [[RET_UNKNOWN]] ]
+; CHECK-NEXT:    ret i32* [[PHI]]
 ;
 entry:
   %cmp = icmp eq i32* %b, null
@@ -1411,35 +1408,20 @@ r:
 declare void @noreturn() noreturn;
 
 define i32 @deadblockphi3(i32 %A, i1 %c) #0 {
-; IS__TUNIT____: Function Attrs: noinline nounwind uwtable
-; IS__TUNIT____-LABEL: define {{[^@]+}}@deadblockphi3
-; IS__TUNIT____-SAME: (i32 returned [[A:%.*]], i1 [[C:%.*]]) #[[ATTR3]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    br i1 [[C]], label [[R:%.*]], label [[UNREACHABLECALL:%.*]]
-; IS__TUNIT____:       unreachablecall:
-; IS__TUNIT____-NEXT:    call void @noreturn() #[[ATTR4:[0-9]+]]
-; IS__TUNIT____-NEXT:    unreachable
-; IS__TUNIT____:       unreachableblock2:
-; IS__TUNIT____-NEXT:    unreachable
-; IS__TUNIT____:       unreachableblock3:
-; IS__TUNIT____-NEXT:    unreachable
-; IS__TUNIT____:       r:
-; IS__TUNIT____-NEXT:    ret i32 [[A]]
-;
-; IS__CGSCC____: Function Attrs: noinline nounwind uwtable
-; IS__CGSCC____-LABEL: define {{[^@]+}}@deadblockphi3
-; IS__CGSCC____-SAME: (i32 returned [[A:%.*]], i1 [[C:%.*]]) #[[ATTR4]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    br i1 [[C]], label [[R:%.*]], label [[UNREACHABLECALL:%.*]]
-; IS__CGSCC____:       unreachablecall:
-; IS__CGSCC____-NEXT:    call void @noreturn() #[[ATTR5:[0-9]+]]
-; IS__CGSCC____-NEXT:    unreachable
-; IS__CGSCC____:       unreachableblock2:
-; IS__CGSCC____-NEXT:    unreachable
-; IS__CGSCC____:       unreachableblock3:
-; IS__CGSCC____-NEXT:    unreachable
-; IS__CGSCC____:       r:
-; IS__CGSCC____-NEXT:    ret i32 [[A]]
+; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK-LABEL: define {{[^@]+}}@deadblockphi3
+; CHECK-SAME: (i32 returned [[A:%.*]], i1 [[C:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C]], label [[R:%.*]], label [[UNREACHABLECALL:%.*]]
+; CHECK:       unreachablecall:
+; CHECK-NEXT:    call void @noreturn() #[[ATTR4:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       unreachableblock2:
+; CHECK-NEXT:    unreachable
+; CHECK:       unreachableblock3:
+; CHECK-NEXT:    unreachable
+; CHECK:       r:
+; CHECK-NEXT:    ret i32 [[A]]
 ;
 entry:
   br i1 %c, label %r, label %unreachablecall
@@ -1504,10 +1486,10 @@ define i32 @exact(i32* align 8 %a, i32* align 8 %b) {
 ; CHECK-NEXT:    [[C2:%.*]] = call i32 @non_exact_2(i32 noundef 2)
 ; CHECK-NEXT:    [[C3:%.*]] = call align 32 i32* @non_exact_3(i32* align 32 [[A]])
 ; CHECK-NEXT:    [[C4:%.*]] = call align 16 i32* @non_exact_4(i32* align 32 [[B]])
-; CHECK-NEXT:    [[C3L:%.*]] = load i32, i32* [[A]], align 32
+; CHECK-NEXT:    [[C3L:%.*]] = load i32, i32* [[C3]], align 32
 ; CHECK-NEXT:    [[C4L:%.*]] = load i32, i32* [[C4]], align 16
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[C0]], [[C1]]
-; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], 2
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 [[ADD1]], [[C2]]
 ; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[ADD2]], [[C3L]]
 ; CHECK-NEXT:    [[ADD4:%.*]] = add i32 [[ADD3]], [[C4L]]
 ; CHECK-NEXT:    ret i32 [[ADD4]]
@@ -1620,18 +1602,19 @@ attributes #0 = { noinline nounwind uwtable }
 ; IS__TUNIT____: attributes #[[ATTR2]] = { nofree noinline noreturn nosync nounwind readnone uwtable willreturn }
 ; IS__TUNIT____: attributes #[[ATTR3]] = { noinline nounwind uwtable }
 ; IS__TUNIT____: attributes #[[ATTR4]] = { noreturn }
-; IS__TUNIT____: attributes #[[ATTR5:[0-9]+]] = { nofree nosync nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR5]] = { nofree nosync nounwind readnone willreturn }
 ; IS__TUNIT____: attributes #[[ATTR6]] = { nofree nosync nounwind readnone }
 ; IS__TUNIT____: attributes #[[ATTR7]] = { nounwind }
 ; IS__TUNIT____: attributes #[[ATTR8:[0-9]+]] = { nounwind readnone }
 ;.
 ; IS__CGSCC____: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone uwtable willreturn }
-; IS__CGSCC____: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable willreturn }
-; IS__CGSCC____: attributes #[[ATTR2]] = { nofree noinline nosync nounwind readnone uwtable }
-; IS__CGSCC____: attributes #[[ATTR3]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable willreturn }
-; IS__CGSCC____: attributes #[[ATTR4]] = { noinline nounwind uwtable }
-; IS__CGSCC____: attributes #[[ATTR5]] = { noreturn }
-; IS__CGSCC____: attributes #[[ATTR6:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone uwtable }
+; IS__CGSCC____: attributes #[[ATTR2]] = { nofree noinline norecurse noreturn nosync nounwind readnone uwtable willreturn }
+; IS__CGSCC____: attributes #[[ATTR3]] = { noinline nounwind uwtable }
+; IS__CGSCC____: attributes #[[ATTR4]] = { noreturn }
+; IS__CGSCC____: attributes #[[ATTR5:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR6]] = { readnone willreturn }
 ; IS__CGSCC____: attributes #[[ATTR7]] = { nofree nosync nounwind readnone }
-; IS__CGSCC____: attributes #[[ATTR8]] = { nounwind }
+; IS__CGSCC____: attributes #[[ATTR8]] = { nounwind readnone }
+; IS__CGSCC____: attributes #[[ATTR9]] = { nounwind }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/undefined_behavior.ll b/llvm/test/Transforms/Attributor/undefined_behavior.ll
index 9cf83a9b8146..57064af288f7 100644
--- a/llvm/test/Transforms/Attributor/undefined_behavior.ll
+++ b/llvm/test/Transforms/Attributor/undefined_behavior.ll
@@ -1086,12 +1086,12 @@ define i32 @violate_noundef_nonpointer() {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@violate_noundef_nonpointer
 ; IS__TUNIT____-SAME: () #[[ATTR0]] {
-; IS__TUNIT____-NEXT:    ret i32 undef
+; IS__TUNIT____-NEXT:    unreachable
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@violate_noundef_nonpointer
 ; IS__CGSCC____-SAME: () #[[ATTR0]] {
-; IS__CGSCC____-NEXT:    ret i32 undef
+; IS__CGSCC____-NEXT:    unreachable
 ;
   %ret = call i32 @argument_noundef1(i32 undef)
   ret i32 %ret
@@ -1115,12 +1115,12 @@ define i32* @violate_noundef_pointer() {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@violate_noundef_pointer
 ; IS__TUNIT____-SAME: () #[[ATTR0]] {
-; IS__TUNIT____-NEXT:    ret i32* undef
+; IS__TUNIT____-NEXT:    unreachable
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@violate_noundef_pointer
 ; IS__CGSCC____-SAME: () #[[ATTR0]] {
-; IS__CGSCC____-NEXT:    ret i32* undef
+; IS__CGSCC____-NEXT:    unreachable
 ;
   %ret = call i32* @argument_noundef2(i32* undef)
   ret i32* %ret

diff  --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index 2ad3ef204321..cf380bd123e1 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -388,9 +388,9 @@ f:
 }
 
 define i32 @ipccp4(i1 %c) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@ipccp4
-; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT____:       t:
 ; IS__TUNIT____-NEXT:    br label [[F]]
@@ -433,14 +433,21 @@ define internal i32* @test_inalloca(i32* inalloca(i32) %a) {
 define i32* @complicated_args_inalloca(i32* %arg) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_inalloca
-; IS__TUNIT____-SAME: (i32* nofree readnone "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call nonnull dereferenceable(4) i32* @test_inalloca(i32* noalias nofree writeonly inalloca(i32) "no-capture-maybe-returned" [[ARG]]) #[[ATTR1]]
+; IS__TUNIT____-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree writeonly inalloca(i32) "no-capture-maybe-returned" [[ARG]]) #[[ATTR1]]
 ; IS__TUNIT____-NEXT:    ret i32* [[CALL]]
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_inalloca
-; IS__CGSCC____-SAME: (i32* nofree noundef nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret i32* [[ARG]]
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_inalloca
+; IS__CGSCC_OPM-SAME: (i32* nofree noundef nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_OPM-NEXT:    [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree noundef nonnull writeonly inalloca(i32) dereferenceable(4) "no-capture-maybe-returned" [[ARG]]) #[[ATTR7:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    ret i32* [[CALL]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_inalloca
+; IS__CGSCC_NPM-SAME: (i32* nofree noundef nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[ARG:%.*]]) #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree noundef nonnull writeonly inalloca(i32) dereferenceable(4) "no-capture-maybe-returned" [[ARG]]) #[[ATTR6:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    ret i32* [[CALL]]
 ;
   %call = call i32* @test_inalloca(i32* inalloca(i32) %arg)
   ret i32* %call
@@ -464,27 +471,29 @@ define i32* @complicated_args_preallocated() {
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated
 ; IS__TUNIT_OPM-SAME: () #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:    [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR7:[0-9]+]]
-; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = call noundef nonnull align 536870912 dereferenceable(4) i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) #[[ATTR1]] [ "preallocated"(token [[C]]) ]
+; IS__TUNIT_OPM-NEXT:    [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) #[[ATTR1]] [ "preallocated"(token [[C]]) ]
 ; IS__TUNIT_OPM-NEXT:    ret i32* [[CALL]]
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated
 ; IS__TUNIT_NPM-SAME: () #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT_NPM-NEXT:    [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR6:[0-9]+]]
-; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call noundef nonnull align 536870912 dereferenceable(4) i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) #[[ATTR1]] [ "preallocated"(token [[C]]) ]
+; IS__TUNIT_NPM-NEXT:    [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) #[[ATTR1]] [ "preallocated"(token [[C]]) ]
 ; IS__TUNIT_NPM-NEXT:    ret i32* [[CALL]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree nosync nounwind willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_preallocated
 ; IS__CGSCC_OPM-SAME: () #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR7:[0-9]+]]
-; IS__CGSCC_OPM-NEXT:    ret i32* null
+; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR8:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) #[[ATTR9:[0-9]+]] [ "preallocated"(token [[C]]) ]
+; IS__CGSCC_OPM-NEXT:    ret i32* [[CALL]]
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree nosync nounwind willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_preallocated
 ; IS__CGSCC_NPM-SAME: () #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR6:[0-9]+]]
-; IS__CGSCC_NPM-NEXT:    ret i32* null
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 noundef 1) #[[ATTR7:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree noundef writeonly preallocated(i32) align 536870912 null) #[[ATTR8:[0-9]+]] [ "preallocated"(token [[C]]) ]
+; IS__CGSCC_NPM-NEXT:    ret i32* [[CALL]]
 ;
   %c = call token @llvm.call.preallocated.setup(i32 1)
   %call = call i32* @test_preallocated(i32* preallocated(i32) null) ["preallocated"(token %c)]
@@ -495,7 +504,7 @@ define internal void @test_sret(%struct.X* sret(%struct.X) %a, %struct.X** %b) {
 ;
 ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_sret
-; IS__TUNIT____-SAME: (%struct.X* noalias nofree noundef nonnull writeonly sret([[STRUCT_X:%.*]]) align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__TUNIT____-SAME: (%struct.X* noalias nofree noundef nonnull writeonly sret([[STRUCT_X:%.*]]) align 536870912 dereferenceable(8) [[A:%.*]], %struct.X** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR3:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    store %struct.X* [[A]], %struct.X** [[B]], align 8
 ; IS__TUNIT____-NEXT:    ret void
 ;
@@ -513,14 +522,14 @@ define void @complicated_args_sret(%struct.X** %b) {
 ;
 ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@complicated_args_sret
-; IS__TUNIT_OPM-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR2]] {
-; IS__TUNIT_OPM-NEXT:    call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR5:[0-9]+]]
+; IS__TUNIT_OPM-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR3]] {
+; IS__TUNIT_OPM-NEXT:    call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR6:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@complicated_args_sret
-; IS__TUNIT_NPM-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR2]] {
-; IS__TUNIT_NPM-NEXT:    call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR4:[0-9]+]]
+; IS__TUNIT_NPM-SAME: (%struct.X** nocapture nofree writeonly [[B:%.*]]) #[[ATTR3]] {
+; IS__TUNIT_NPM-NEXT:    call void @test_sret(%struct.X* noalias nocapture nofree noundef writeonly sret([[STRUCT_X:%.*]]) align 536870912 null, %struct.X** nocapture nofree writeonly align 8 [[B]]) #[[ATTR5:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn writeonly
@@ -533,10 +542,15 @@ define void @complicated_args_sret(%struct.X** %b) {
 }
 
 define internal %struct.X* @test_nest(%struct.X* nest %a) {
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test_nest
+; IS__TUNIT____-SAME: (%struct.X* nest noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-NEXT:    ret %struct.X* [[A]]
+;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_nest
-; IS__CGSCC____-SAME: (%struct.X* nest noalias nocapture nofree readnone align 536870912 [[A:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret %struct.X* undef
+; IS__CGSCC____-SAME: (%struct.X* nest noalias nofree noundef readnone returned align 536870912 "no-capture-maybe-returned" [[A:%.*]]) #[[ATTR1]] {
+; IS__CGSCC____-NEXT:    ret %struct.X* [[A]]
 ;
   ret %struct.X* %a
 }
@@ -544,12 +558,20 @@ define %struct.X* @complicated_args_nest() {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_nest
 ; IS__TUNIT____-SAME: () #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret %struct.X* null
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) #[[ATTR1]]
+; IS__TUNIT____-NEXT:    ret %struct.X* [[CALL]]
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@complicated_args_nest
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    ret %struct.X* null
+; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_nest
+; IS__CGSCC_OPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_OPM-NEXT:    [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) #[[ATTR7]]
+; IS__CGSCC_OPM-NEXT:    ret %struct.X* [[CALL]]
+;
+; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_nest
+; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call %struct.X* @test_nest(%struct.X* noalias nocapture nofree noundef readnone align 536870912 null) #[[ATTR6]]
+; IS__CGSCC_NPM-NEXT:    ret %struct.X* [[CALL]]
 ;
   %call = call %struct.X* @test_nest(%struct.X* null)
   ret %struct.X* %call
@@ -601,7 +623,7 @@ define void @complicated_args_byval() {
 define internal i8*@test_byval2(%struct.X* byval(%struct.X) %a) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_byval2
-; IS__TUNIT____-SAME: () #[[ATTR3:[0-9]+]] {
+; IS__TUNIT____-SAME: () #[[ATTR4:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    [[L:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT_X:%.*]], %struct.X* @S, i32 0, i32 0), align 8
 ; IS__TUNIT____-NEXT:    ret i8* [[L]]
 ;
@@ -616,23 +638,22 @@ define internal i8*@test_byval2(%struct.X* byval(%struct.X) %a) {
   ret i8* %l
 }
 define i8* @complicated_args_byval2() {
-;
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_byval2
-; IS__TUNIT____-SAME: () #[[ATTR3]] {
-; IS__TUNIT____-NEXT:    [[C:%.*]] = call i8* @test_byval2() #[[ATTR3]]
+; IS__TUNIT____-SAME: () #[[ATTR4]] {
+; IS__TUNIT____-NEXT:    [[C:%.*]] = call i8* @test_byval2() #[[ATTR4]]
 ; IS__TUNIT____-NEXT:    ret i8* [[C]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_byval2
 ; IS__CGSCC_OPM-SAME: () #[[ATTR3]] {
-; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = call i8* @test_byval2() #[[ATTR8:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = call i8* @test_byval2() #[[ATTR10:[0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    ret i8* [[C]]
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_byval2
 ; IS__CGSCC_NPM-SAME: () #[[ATTR3]] {
-; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i8* @test_byval2() #[[ATTR7:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i8* @test_byval2() #[[ATTR9:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i8* [[C]]
 ;
   %c = call i8* @test_byval2(%struct.X* byval(%struct.X) @S)
@@ -642,7 +663,7 @@ define i8* @complicated_args_byval2() {
 define void @fixpoint_changed(i32* %p) {
 ; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fixpoint_changed
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR4:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR5:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_COND:%.*]]
 ; IS__TUNIT_OPM:       for.cond:
@@ -665,7 +686,7 @@ define void @fixpoint_changed(i32* %p) {
 ;
 ; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fixpoint_changed
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_NPM-SAME: (i32* nocapture nofree writeonly [[P:%.*]]) #[[ATTR3]] {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND:%.*]]
 ; IS__TUNIT_NPM:       for.cond:
@@ -856,13 +877,13 @@ define internal i8 @callee(i8 %a) {
 define void @user_as3() {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@user_as3
-; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR6]] {
 ; IS__TUNIT_OPM-NEXT:    store i32 0, i32 addrspace(3)* @ConstAS3Ptr, align 4
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@user_as3
-; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR5]] {
 ; IS__TUNIT_NPM-NEXT:    store i32 0, i32 addrspace(3)* @ConstAS3Ptr, align 4
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
@@ -885,13 +906,13 @@ define void @user_as3() {
 define void @user() {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@user
-; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR6]] {
 ; IS__TUNIT_OPM-NEXT:    store i32 0, i32* addrspacecast (i32 addrspace(3)* @ConstAS3Ptr to i32*), align 4
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@user
-; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR5]] {
 ; IS__TUNIT_NPM-NEXT:    store i32 0, i32* addrspacecast (i32 addrspace(3)* @ConstAS3Ptr to i32*), align 4
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
@@ -914,17 +935,11 @@ define void @user() {
 
 
 define i1 @test_merge_with_undef_values_ptr(i1 %c) {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test_merge_with_undef_values_ptr
-; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR6:[0-9]+]] {
-; IS__TUNIT_OPM-NEXT:    [[R1:%.*]] = call i1 @undef_then_null(i1 [[C]]) #[[ATTR6]]
-; IS__TUNIT_OPM-NEXT:    ret i1 [[R1]]
-;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test_merge_with_undef_values_ptr
-; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR5:[0-9]+]] {
-; IS__TUNIT_NPM-NEXT:    [[R1:%.*]] = call i1 @undef_then_null(i1 [[C]]) #[[ATTR5]]
-; IS__TUNIT_NPM-NEXT:    ret i1 [[R1]]
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test_merge_with_undef_values_ptr
+; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT____-NEXT:    [[R1:%.*]] = call i1 @undef_then_null(i1 [[C]]) #[[ATTR2]]
+; IS__TUNIT____-NEXT:    ret i1 [[R1]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_merge_with_undef_values_ptr
@@ -935,31 +950,18 @@ define i1 @test_merge_with_undef_values_ptr(i1 %c) {
   ret i1 %r1
 }
 define internal i1 @undef_then_null(i1 %c, i32* %i32Aptr, i32* %i32Bptr) {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@undef_then_null
-; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR6]] {
-; IS__TUNIT_OPM-NEXT:    [[CMP1:%.*]] = icmp eq i32* null, null
-; IS__TUNIT_OPM-NEXT:    [[CMP2:%.*]] = icmp eq i1 [[CMP1]], false
-; IS__TUNIT_OPM-NEXT:    [[OR:%.*]] = or i1 [[CMP2]], [[C]]
-; IS__TUNIT_OPM-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
-; IS__TUNIT_OPM:       a:
-; IS__TUNIT_OPM-NEXT:    [[R2:%.*]] = call i1 @undef_then_null(i1 noundef false) #[[ATTR6]]
-; IS__TUNIT_OPM-NEXT:    ret i1 [[R2]]
-; IS__TUNIT_OPM:       b:
-; IS__TUNIT_OPM-NEXT:    ret i1 [[CMP2]]
-;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@undef_then_null
-; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR5]] {
-; IS__TUNIT_NPM-NEXT:    [[CMP1:%.*]] = icmp eq i32* null, null
-; IS__TUNIT_NPM-NEXT:    [[CMP2:%.*]] = icmp eq i1 [[CMP1]], false
-; IS__TUNIT_NPM-NEXT:    [[OR:%.*]] = or i1 [[CMP2]], [[C]]
-; IS__TUNIT_NPM-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
-; IS__TUNIT_NPM:       a:
-; IS__TUNIT_NPM-NEXT:    [[R2:%.*]] = call i1 @undef_then_null(i1 noundef false) #[[ATTR5]]
-; IS__TUNIT_NPM-NEXT:    ret i1 [[R2]]
-; IS__TUNIT_NPM:       b:
-; IS__TUNIT_NPM-NEXT:    ret i1 [[CMP2]]
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone
+; IS__TUNIT____-LABEL: define {{[^@]+}}@undef_then_null
+; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT____-NEXT:    [[CMP1:%.*]] = icmp eq i32* null, null
+; IS__TUNIT____-NEXT:    [[CMP2:%.*]] = icmp eq i1 [[CMP1]], false
+; IS__TUNIT____-NEXT:    [[OR:%.*]] = or i1 [[CMP2]], [[C]]
+; IS__TUNIT____-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
+; IS__TUNIT____:       a:
+; IS__TUNIT____-NEXT:    [[R2:%.*]] = call i1 @undef_then_null(i1 noundef false) #[[ATTR2]]
+; IS__TUNIT____-NEXT:    ret i1 [[R2]]
+; IS__TUNIT____:       b:
+; IS__TUNIT____-NEXT:    ret i1 [[CMP2]]
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@undef_then_null
@@ -969,7 +971,7 @@ define internal i1 @undef_then_null(i1 %c, i32* %i32Aptr, i32* %i32Bptr) {
 ; IS__CGSCC_OPM-NEXT:    [[OR:%.*]] = or i1 false, [[C]]
 ; IS__CGSCC_OPM-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
 ; IS__CGSCC_OPM:       a:
-; IS__CGSCC_OPM-NEXT:    [[R2:%.*]] = call i1 @undef_then_null(i1 noundef false) #[[ATTR9:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    [[R2:%.*]] = call i1 @undef_then_null(i1 noundef false) #[[ATTR11:[0-9]+]]
 ; IS__CGSCC_OPM-NEXT:    ret i1 undef
 ; IS__CGSCC_OPM:       b:
 ; IS__CGSCC_OPM-NEXT:    ret i1 undef
@@ -982,7 +984,7 @@ define internal i1 @undef_then_null(i1 %c, i32* %i32Aptr, i32* %i32Bptr) {
 ; IS__CGSCC_NPM-NEXT:    [[OR:%.*]] = or i1 false, [[C]]
 ; IS__CGSCC_NPM-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
 ; IS__CGSCC_NPM:       a:
-; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i1 @undef_then_null(i1 noundef false) #[[ATTR8:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i1 @undef_then_null(i1 noundef false) #[[ATTR10:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i1 undef
 ; IS__CGSCC_NPM:       b:
 ; IS__CGSCC_NPM-NEXT:    ret i1 undef
@@ -999,9 +1001,10 @@ b:
 }
 
 define i1 @test_merge_with_undef_values(i1 %c) {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_merge_with_undef_values
-; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
+; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT____-NEXT:    [[R1:%.*]] = call noundef i1 @undef_then_1(i1 [[C]]) #[[ATTR2]]
 ; IS__TUNIT____-NEXT:    ret i1 false
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
@@ -1013,16 +1016,38 @@ define i1 @test_merge_with_undef_values(i1 %c) {
   ret i1 %r1
 }
 define internal i1 @undef_then_1(i1 %c, i32 %i32A, i32 %i32B) {
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone
+; IS__TUNIT____-LABEL: define {{[^@]+}}@undef_then_1
+; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR2]] {
+; IS__TUNIT____-NEXT:    [[OR:%.*]] = or i1 false, [[C]]
+; IS__TUNIT____-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
+; IS__TUNIT____:       a:
+; IS__TUNIT____-NEXT:    [[R2:%.*]] = call noundef i1 @undef_then_1(i1 noundef false) #[[ATTR2]]
+; IS__TUNIT____-NEXT:    ret i1 false
+; IS__TUNIT____:       b:
+; IS__TUNIT____-NEXT:    ret i1 false
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@undef_then_1
-; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] {
-; IS__CGSCC____-NEXT:    [[OR:%.*]] = or i1 false, [[C]]
-; IS__CGSCC____-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
-; IS__CGSCC____:       a:
-; IS__CGSCC____-NEXT:    ret i1 undef
-; IS__CGSCC____:       b:
-; IS__CGSCC____-NEXT:    ret i1 undef
+; IS__CGSCC_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@undef_then_1
+; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR6]] {
+; IS__CGSCC_OPM-NEXT:    [[OR:%.*]] = or i1 false, [[C]]
+; IS__CGSCC_OPM-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
+; IS__CGSCC_OPM:       a:
+; IS__CGSCC_OPM-NEXT:    [[R2:%.*]] = call noundef i1 @undef_then_1(i1 noundef false) #[[ATTR11]]
+; IS__CGSCC_OPM-NEXT:    ret i1 undef
+; IS__CGSCC_OPM:       b:
+; IS__CGSCC_OPM-NEXT:    ret i1 undef
+;
+; IS__CGSCC_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@undef_then_1
+; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR5]] {
+; IS__CGSCC_NPM-NEXT:    [[OR:%.*]] = or i1 false, [[C]]
+; IS__CGSCC_NPM-NEXT:    br i1 [[OR]], label [[A:%.*]], label [[B:%.*]]
+; IS__CGSCC_NPM:       a:
+; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call noundef i1 @undef_then_1(i1 noundef false) #[[ATTR10]]
+; IS__CGSCC_NPM-NEXT:    ret i1 undef
+; IS__CGSCC_NPM:       b:
+; IS__CGSCC_NPM-NEXT:    ret i1 undef
 ;
   %cmp1 = icmp eq i32 %i32A, %i32B
   %cmp2 = icmp eq i1 %cmp1, false
@@ -1039,7 +1064,8 @@ define i32 @test_select(i32 %c) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_select
 ; IS__TUNIT____-SAME: (i32 [[C:%.*]]) #[[ATTR1]] {
-; IS__TUNIT____-NEXT:    ret i32 42
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32 @select() #[[ATTR1]]
+; IS__TUNIT____-NEXT:    ret i32 [[CALL]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_select
@@ -1051,6 +1077,11 @@ define i32 @test_select(i32 %c) {
 }
 
 define internal i32 @select(i1 %a, i32 %b, i32 %c) {
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@select
+; IS__TUNIT____-SAME: () #[[ATTR1]] {
+; IS__TUNIT____-NEXT:    ret i32 42
+;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@select
 ; IS__CGSCC____-SAME: () #[[ATTR1]] {
@@ -1079,7 +1110,7 @@ define void @test_callee_is_undef(void (i32)* %fn) {
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@test_callee_is_undef
 ; IS__TUNIT____-SAME: (void (i32)* nocapture nofree [[FN:%.*]]) {
 ; IS__TUNIT____-NEXT:    call void @callee_is_undef()
-; IS__TUNIT____-NEXT:    call void @unknown_calle_arg_is_undef(void (i32)* nocapture nofree [[FN]])
+; IS__TUNIT____-NEXT:    call void @unknown_calle_arg_is_undef(void (i32)* nocapture nofree [[FN]], i32 undef)
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_callee_is_undef
@@ -1097,7 +1128,7 @@ define internal void @callee_is_undef(void ()* %fn) {
 ; IS__TUNIT____-NEXT:    ret void
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@callee_is_undef
-; IS__CGSCC____-SAME: (void ()* nocapture nofree noundef nonnull [[FN:%.*]]) {
+; IS__CGSCC____-SAME: (void ()* noalias nocapture nofree noundef nonnull [[FN:%.*]]) {
 ; IS__CGSCC____-NEXT:    call void [[FN]]()
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -1106,10 +1137,15 @@ define internal void @callee_is_undef(void ()* %fn) {
 }
 define internal void @unknown_calle_arg_is_undef(void (i32)* %fn, i32 %arg) {
 ;
-; CHECK-LABEL: define {{[^@]+}}@unknown_calle_arg_is_undef
-; CHECK-SAME: (void (i32)* nocapture nofree noundef nonnull [[FN:%.*]]) {
-; CHECK-NEXT:    call void [[FN]](i32 undef)
-; CHECK-NEXT:    ret void
+; IS__TUNIT____-LABEL: define {{[^@]+}}@unknown_calle_arg_is_undef
+; IS__TUNIT____-SAME: (void (i32)* nocapture nofree noundef nonnull [[FN:%.*]], i32 [[ARG:%.*]]) {
+; IS__TUNIT____-NEXT:    call void [[FN]](i32 undef)
+; IS__TUNIT____-NEXT:    ret void
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@unknown_calle_arg_is_undef
+; IS__CGSCC____-SAME: (void (i32)* nocapture nofree noundef nonnull [[FN:%.*]]) {
+; IS__CGSCC____-NEXT:    call void [[FN]](i32 undef)
+; IS__CGSCC____-NEXT:    ret void
 ;
   call void %fn(i32 %arg)
   ret void
@@ -1123,7 +1159,7 @@ define internal void @unknown_calle_arg_is_undef(void (i32)* %fn, i32 %arg) {
 define internal void @f1(i8*** %a) {
 ; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@f1
-; IS__TUNIT____-SAME: (i8*** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR2]] {
+; IS__TUNIT____-SAME: (i8*** nocapture nofree noundef nonnull writeonly align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR3]] {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[X:%.*]] = getelementptr { [2 x i8*] }, { [2 x i8*] }* @g, i32 0, i32 0, i32 0
 ; IS__TUNIT____-NEXT:    store i8** [[X]], i8*** [[A]], align 8
@@ -1218,97 +1254,22 @@ define internal i1 @cmp_null_after_cast(i32 %a, i8 %b) {
   ret i1 %c
 }
 
-
-declare i8* @m()
-
-define i32 @test(i1 %c) {
-; CHECK-LABEL: define {{[^@]+}}@test
-; CHECK-SAME: (i1 [[C:%.*]]) {
-; CHECK-NEXT:    [[R1:%.*]] = call i32 @ctx_test1(i1 [[C]])
-; CHECK-NEXT:    [[R2:%.*]] = call i32 @ctx_test2(i1 [[C]]), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[R1]], [[R2]]
-; CHECK-NEXT:    ret i32 [[ADD]]
-;
-  %r1 = call i32 @ctx_test1(i1 %c)
-  %r2 = call i32 @ctx_test2(i1 %c)
-  %add = add i32 %r1, %r2
-  ret i32 %add
-}
-
-define internal i32 @ctx_test1(i1 %c) {
-; CHECK-LABEL: define {{[^@]+}}@ctx_test1
-; CHECK-SAME: (i1 [[C:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C]], label [[THEN:%.*]], label [[JOIN:%.*]]
-; CHECK:       then:
-; CHECK-NEXT:    [[M:%.*]] = tail call i8* @m()
-; CHECK-NEXT:    [[I:%.*]] = ptrtoint i8* [[M]] to i64
-; CHECK-NEXT:    br label [[JOIN]]
-; CHECK:       join:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i64 [ [[I]], [[THEN]] ], [ undef, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[RET:%.*]] = trunc i64 [[PHI]] to i32
-; CHECK-NEXT:    ret i32 [[RET]]
-;
-entry:
-  br i1 %c, label %then, label %join
-
-then:
-  %m = tail call i8* @m()
-  %i = ptrtoint i8* %m to i64
-  br label %join
-
-join:
-  %phi = phi i64 [ %i, %then ], [ undef, %entry ]
-  %ret = trunc i64 %phi to i32
-  ret i32 %ret
-}
-
-define internal i32 @ctx_test2(i1 %c) {
-; CHECK-LABEL: define {{[^@]+}}@ctx_test2
-; CHECK-SAME: (i1 [[C:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 [[C]], label [[THEN:%.*]], label [[JOIN:%.*]]
-; CHECK:       then:
-; CHECK-NEXT:    [[M:%.*]] = tail call i8* @m()
-; CHECK-NEXT:    [[I:%.*]] = ptrtoint i8* [[M]] to i32
-; CHECK-NEXT:    br label [[JOIN]]
-; CHECK:       join:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[I]], [[THEN]] ], [ undef, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[PHI]], 1
-; CHECK-NEXT:    ret i32 [[RET]]
-;
-entry:
-  br i1 %c, label %then, label %join
-
-then:
-  %m = tail call i8* @m()
-  %i = ptrtoint i8* %m to i32
-  br label %join
-
-join:
-  %phi = phi i32 [ %i, %then ], [ undef, %entry ]
-  %ret = lshr i32 %phi, 1
-  ret i32 %ret
-
-  uselistorder label %join, { 1, 0 }
-}
-
 ;.
 ; IS__TUNIT_OPM: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind willreturn writeonly }
-; IS__TUNIT_OPM: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR4]] = { argmemonly nofree nosync nounwind writeonly }
-; IS__TUNIT_OPM: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly }
-; IS__TUNIT_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind readnone }
+; IS__TUNIT_OPM: attributes #[[ATTR2]] = { nofree nosync nounwind readnone }
+; IS__TUNIT_OPM: attributes #[[ATTR3]] = { argmemonly nofree nosync nounwind willreturn writeonly }
+; IS__TUNIT_OPM: attributes #[[ATTR4]] = { nofree nosync nounwind readonly willreturn }
+; IS__TUNIT_OPM: attributes #[[ATTR5]] = { argmemonly nofree nosync nounwind writeonly }
+; IS__TUNIT_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly }
 ; IS__TUNIT_OPM: attributes #[[ATTR7]] = { willreturn }
 ;.
 ; IS__TUNIT_NPM: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn }
 ; IS__TUNIT_NPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR2]] = { argmemonly nofree nosync nounwind willreturn writeonly }
-; IS__TUNIT_NPM: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly }
-; IS__TUNIT_NPM: attributes #[[ATTR5]] = { nofree nosync nounwind readnone }
+; IS__TUNIT_NPM: attributes #[[ATTR2]] = { nofree nosync nounwind readnone }
+; IS__TUNIT_NPM: attributes #[[ATTR3]] = { argmemonly nofree nosync nounwind willreturn writeonly }
+; IS__TUNIT_NPM: attributes #[[ATTR4]] = { nofree nosync nounwind readonly willreturn }
+; IS__TUNIT_NPM: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly }
 ; IS__TUNIT_NPM: attributes #[[ATTR6]] = { willreturn }
 ;.
 ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn }
@@ -1318,9 +1279,11 @@ join:
 ; IS__CGSCC_OPM: attributes #[[ATTR4]] = { argmemonly nofree norecurse nosync nounwind writeonly }
 ; IS__CGSCC_OPM: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__CGSCC_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind readnone willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR7]] = { willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR8]] = { readonly willreturn }
-; IS__CGSCC_OPM: attributes #[[ATTR9]] = { nofree nosync nounwind readnone }
+; IS__CGSCC_OPM: attributes #[[ATTR7]] = { readnone willreturn }
+; IS__CGSCC_OPM: attributes #[[ATTR8]] = { willreturn }
+; IS__CGSCC_OPM: attributes #[[ATTR9]] = { nounwind readnone willreturn }
+; IS__CGSCC_OPM: attributes #[[ATTR10]] = { readonly willreturn }
+; IS__CGSCC_OPM: attributes #[[ATTR11]] = { nofree nosync nounwind readnone }
 ;.
 ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
@@ -1328,9 +1291,9 @@ join:
 ; IS__CGSCC_NPM: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__CGSCC_NPM: attributes #[[ATTR5]] = { nofree nosync nounwind readnone willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR6]] = { willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR7]] = { readonly willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR8]] = { nofree nosync nounwind readnone }
-;.
-; CHECK: [[RNG0]] = !{i32 0, i32 -2147483648}
+; IS__CGSCC_NPM: attributes #[[ATTR6]] = { readnone willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR7]] = { willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR8]] = { nounwind readnone willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR9]] = { readonly willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR10]] = { nofree nosync nounwind readnone }
 ;.

diff  --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
deleted file mode 100644
index f2e5e59ad5dc..000000000000
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ /dev/null
@@ -1,1890 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --include-generated-funcs
-; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
-
-;; void p0(void);
-;; void p1(void);
-;; int unknown(void);
-;; void unknown_pure(void) __attribute__((pure));
-;; void unknown_no_openmp(void) __attribute__((assume("omp_no_openmp")));
-;;
-;; int G;
-;; void no_parallel_region_in_here(void) {
-;; #pragma omp single
-;;   G = 0;
-;; }
-;;
-;; void no_state_machine_needed() {
-;; #pragma omp target teams
-;;   no_parallel_region_in_here();
-;; }
-;;
-;; void simple_state_machine() {
-;; #pragma omp target teams
-;;   {
-;; #pragma omp parallel
-;;     { p0(); }
-;;     no_parallel_region_in_here();
-;; #pragma omp parallel
-;;     { p1(); }
-;;   }
-;; }
-;;
-;; void simple_state_machine_interprocedural_after(void);
-;; void simple_state_machine_interprocedural_before(void) {
-;; #pragma omp parallel
-;;   { p0(); }
-;; }
-;; void simple_state_machine_interprocedural() {
-;; #pragma omp target teams
-;;   {
-;;     simple_state_machine_interprocedural_before();
-;;     no_parallel_region_in_here();
-;; #pragma omp parallel
-;;     { p1(); }
-;;     simple_state_machine_interprocedural_after();
-;;   }
-;; }
-;; void simple_state_machine_interprocedural_after(void) {
-;; #pragma omp parallel
-;;   { p0(); }
-;; }
-;;
-;; void simple_state_machine_with_fallback() {
-;; #pragma omp target teams
-;;   {
-;; #pragma omp parallel
-;;     { p0(); }
-;;     unknown();
-;; #pragma omp parallel
-;;     { p1(); }
-;;   }
-;; }
-;;
-;; void simple_state_machine_no_openmp_attr() {
-;; #pragma omp target teams
-;;   {
-;; #pragma omp parallel
-;;     { p0(); }
-;;     unknown_no_openmp();
-;; #pragma omp parallel
-;;     { p1(); }
-;;   }
-;; }
-;;
-;; void simple_state_machine_pure() {
-;; #pragma omp target teams
-;;   {
-;; #pragma omp parallel
-;;     { p0(); }
-;;     unknown_pure();
-;; #pragma omp parallel
-;;     { p1(); }
-;;   }
-;; }
-;;
-;; int omp_get_thread_num();
-;; void simple_state_machine_interprocedural_nested_recursive_after(int);
-;; void simple_state_machine_interprocedural_nested_recursive_after_after(void);
-;; void simple_state_machine_interprocedural_nested_recursive() {
-;; #pragma omp target teams
-;;   {
-;;     simple_state_machine_interprocedural_nested_recursive_after(
-;;         omp_get_thread_num());
-;;   }
-;; }
-;;
-;; void simple_state_machine_interprocedural_nested_recursive_after(int a) {
-;;   if (a == 0)
-;;     return;
-;;   simple_state_machine_interprocedural_nested_recursive_after(a - 1);
-;;   simple_state_machine_interprocedural_nested_recursive_after_after();
-;; }
-;; void simple_state_machine_interprocedural_nested_recursive_after_after(void) {
-;; #pragma omp parallel
-;;   { p0(); }
-;; }
-;;
-;; __attribute__((weak)) void weak_callee_empty(void) {}
-;; void no_state_machine_weak_callee() {
-;; #pragma omp target teams
-;;   { weak_callee_empty(); }
-;; }
-
-target triple = "nvptx64"
-
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
-
-@"_openmp_kernel_static_glob_rd$ptr" = internal addrspace(3) global i8* undef
- at 0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
- at 1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
- at __omp_offloading_2c_389eb_no_state_machine_needed_l14_exec_mode = weak constant i8 1
- at __omp_offloading_2c_389eb_simple_state_machine_l19_exec_mode = weak constant i8 1
- at __omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35_exec_mode = weak constant i8 1
- at __omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50_exec_mode = weak constant i8 1
- at __omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61_exec_mode = weak constant i8 1
- at __omp_offloading_2c_389eb_simple_state_machine_pure_l72_exec_mode = weak constant i8 1
- at __omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86_exec_mode = weak constant i8 1
- at __omp_offloading_2c_389eb_no_state_machine_weak_callee_l106_exec_mode = weak constant i8 1
- at 2 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
- at G = external global i32, align 4
- at V = external global i1, align 4
- at 3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 322, i32 2, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
- at llvm.compiler.used = appending global [8 x i8*] [i8* @__omp_offloading_2c_389eb_no_state_machine_needed_l14_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_l19_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_pure_l72_exec_mode, i8* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86_exec_mode, i8* @__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106_exec_mode], section "llvm.metadata"
-
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
-; No user code state machine is build because we do not need one.
-define weak void @__omp_offloading_2c_389eb_no_state_machine_needed_l14() #0 {
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-; Verify we will not store a constant true here even though initially all call sites pass `i1 true` for  the second-to-last argument.
-define internal i32 @__kmpc_target_init(%struct.ident_t*, i1, i1 %use_generic_state_machine, i1) {
-  store i1 %use_generic_state_machine, i1* @V
-  %call = call i32 @unknown()
-  ret i32 %call
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @no_parallel_region_in_here() #8
-  ret void
-}
-
-; Function Attrs: convergent nounwind
-define hidden void @no_parallel_region_in_here() #1 {
-entry:
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = call i32 @__kmpc_single(%struct.ident_t* @2, i32 %0)
-  %2 = icmp ne i32 %1, 0
-  br i1 %2, label %omp_if.then, label %omp_if.end
-
-omp_if.then:                                      ; preds = %entry
-  store i32 0, i32* @G, align 4
-  call void @__kmpc_end_single(%struct.ident_t* @2, i32 %0)
-  br label %omp_if.end
-
-omp_if.end:                                       ; preds = %omp_if.then, %entry
-  call void @__kmpc_barrier(%struct.ident_t* @3, i32 %0)
-  ret void
-}
-
-; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #2
-
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
-
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
-; A user code state machine is build because we do need one. No fallback and only one pointer comparison are needed.
-define weak void @__omp_offloading_2c_389eb_simple_state_machine_l19() #0 {
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__1(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** %2, i64 0)
-  call void @no_parallel_region_in_here() #8
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** %3, i64 0)
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p0() #8
-  ret void
-}
-
-; Function Attrs: convergent
-declare void @p0() #3
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__2(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-declare void @__kmpc_get_shared_variables(i8***)
-
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__3(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p1() #8
-  ret void
-}
-
-; Function Attrs: convergent
-declare void @p1() #3
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__3(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
-; A user code state machine is build because we do need one. No fallback and only two pointer comparison are needed.
-define weak void @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35() #0 {
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__4(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @simple_state_machine_interprocedural_before() #8
-  call void @no_parallel_region_in_here() #8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** %2, i64 0)
-  call void @simple_state_machine_interprocedural_after() #8
-  ret void
-}
-
-; Function Attrs: convergent nounwind
-define hidden void @simple_state_machine_interprocedural_before() #1 {
-entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** %1, i64 0)
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__5(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p1() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__5(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; Function Attrs: convergent nounwind
-define hidden void @simple_state_machine_interprocedural_after() #1 {
-entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** %1, i64 0)
-  ret void
-}
-
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
-; A user code state machine is build because we do need one. A fallback indirect call and only two pointer comparison are needed.
-define weak void @__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50() #0 {
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__6(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** %2, i64 0)
-  %3 = call i32 @unknown() #8
-  %4 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** %4, i64 0)
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__7(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p0() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__7(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; Function Attrs: convergent
-declare i32 @unknown() #3
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__8(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p1() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__8_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__8(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
-; A user code state machine is build because we do need one. No fallback and only one pointer comparison is needed.
-define weak void @__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61() #0 {
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__9(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__9(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** %2, i64 0)
-  call void @unknown_no_openmp() #9
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** %3, i64 0)
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__10(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p0() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__10_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__10(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; Function Attrs: convergent
-declare void @unknown_no_openmp() #5
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__11(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p1() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__11_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__11(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
-; A user code state machine is build because we do need one. No fallback and only one pointer comparison is needed.
-define weak void @__omp_offloading_2c_389eb_simple_state_machine_pure_l72() #0 {
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__12(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__12(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %captured_vars_addrs1 = alloca [0 x i8*], align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %0 = load i32*, i32** %.global_tid..addr, align 8
-  %1 = load i32, i32* %0, align 4
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** %2, i64 0)
-  call void @unknown_pure() #10
-  %3 = bitcast [0 x i8*]* %captured_vars_addrs1 to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %1, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** %3, i64 0)
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__13(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p0() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__13_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__13(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; Function Attrs: convergent nounwind readonly willreturn
-declare void @unknown_pure() #6
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__14(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p1() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__14_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__14(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
-; A user code state machine is build because we do need one. No fallback and no pointer comparison is needed.
-define weak void @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86() #0 {
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__15(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__15(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  %call = call i32 @omp_get_thread_num()
-  call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %call) #8
-  ret void
-}
-
-; Function Attrs: convergent nounwind
-define hidden void @simple_state_machine_interprocedural_nested_recursive_after(i32 %a) #1 {
-entry:
-  %a.addr = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  %0 = load i32, i32* %a.addr, align 4
-  %cmp = icmp eq i32 %0, 0
-  br i1 %cmp, label %if.then, label %if.end
-
-if.then:                                          ; preds = %entry
-  br label %return
-
-if.end:                                           ; preds = %entry
-  %1 = load i32, i32* %a.addr, align 4
-  %sub = sub nsw i32 %1, 1
-  call void @simple_state_machine_interprocedural_nested_recursive_after(i32 %sub) #8
-  call void @simple_state_machine_interprocedural_nested_recursive_after_after() #8
-  br label %return
-
-return:                                           ; preds = %if.end, %if.then
-  ret void
-}
-
-; Function Attrs: convergent
-declare i32 @omp_get_thread_num() #3
-
-; The second to last argument of __kmpc_target_init is is set to false to indicate we do not need the generic runtime state machine.
-; A pretty generic user code state machine is build because we do not know anything about the weak callee.
-define weak void @__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106() #0 {
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__16(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__16(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @weak_callee_empty() #8
-  ret void
-}
-
-; Function Attrs: convergent nounwind
-define weak hidden void @weak_callee_empty() #1 {
-entry:
-  ret void
-}
-
-; Function Attrs: convergent nounwind
-declare void @__kmpc_end_single(%struct.ident_t*, i32) #7
-
-; Function Attrs: convergent nounwind
-declare i32 @__kmpc_single(%struct.ident_t*, i32) #7
-
-; Function Attrs: convergent nounwind
-declare void @__kmpc_barrier(%struct.ident_t*, i32) #7
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__17(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p0() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__17_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__17(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__18(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p0() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__18_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__18(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-; Function Attrs: convergent nounwind
-define hidden void @simple_state_machine_interprocedural_nested_recursive_after_after() #1 {
-entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @2, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** %1, i64 0)
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__19(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-entry:
-  %.global_tid..addr = alloca i32*, align 8
-  %.bound_tid..addr = alloca i32*, align 8
-  store i32* %.global_tid., i32** %.global_tid..addr, align 8
-  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
-  call void @p0() #8
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__19_wrapper(i16 zeroext %0, i32 %1) #4 {
-entry:
-  %.addr = alloca i16, align 2
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i16 %0, i16* %.addr, align 2
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__19(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #1 = { convergent nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #2 = { nounwind }
-attributes #3 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #4 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #5 = { convergent "frame-pointer"="all" "llvm.assume"="omp_no_openmp" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #6 = { convergent nounwind readonly willreturn "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #7 = { convergent nounwind }
-attributes #8 = { convergent }
-attributes #9 = { convergent "llvm.assume"="omp_no_openmp" }
-attributes #10 = { convergent nounwind readonly willreturn }
-
-!omp_offload.info = !{!0, !1, !2, !3, !4, !5, !6, !7}
-!nvvm.annotations = !{!8, !9, !10, !11, !12, !13, !14, !15}
-!llvm.module.flags = !{!16, !17, !18, !20, !21}
-!llvm.ident = !{!19}
-
-!0 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_interprocedural", i32 35, i32 2}
-!1 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_no_openmp_attr", i32 61, i32 4}
-!2 = !{i32 0, i32 44, i32 231915, !"no_state_machine_needed", i32 14, i32 0}
-!3 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_with_fallback", i32 50, i32 3}
-!4 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_pure", i32 72, i32 5}
-!5 = !{i32 0, i32 44, i32 231915, !"simple_state_machine_interprocedural_nested_recursive", i32 86, i32 6}
-!6 = !{i32 0, i32 44, i32 231915, !"no_state_machine_weak_callee", i32 106, i32 7}
-!7 = !{i32 0, i32 44, i32 231915, !"simple_state_machine", i32 19, i32 1}
-!8 = !{void ()* @__omp_offloading_2c_389eb_no_state_machine_needed_l14, !"kernel", i32 1}
-!9 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_l19, !"kernel", i32 1}
-!10 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35, !"kernel", i32 1}
-!11 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50, !"kernel", i32 1}
-!12 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61, !"kernel", i32 1}
-!13 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_pure_l72, !"kernel", i32 1}
-!14 = !{void ()* @__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86, !"kernel", i32 1}
-!15 = !{void ()* @__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106, !"kernel", i32 1}
-!16 = !{i32 1, !"wchar_size", i32 4}
-!17 = !{i32 7, !"PIC Level", i32 2}
-!18 = !{i32 7, !"frame-pointer", i32 2}
-!19 = !{!"clang version 13.0.0"}
-!20 = !{i32 7, !"openmp", i32 50}
-!21 = !{i32 7, !"openmp-device", i32 50}
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_no_state_machine_needed_l14
-; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1:[0-9]+]], i1 noundef false, i1 noundef false, i1 noundef true)
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: define {{[^@]+}}@__kmpc_target_init
-; CHECK-SAME: (%struct.ident_t* noalias nocapture nofree nonnull readnone align 8 dereferenceable(24) [[TMP0:%.*]], i1 [[TMP1:%.*]], i1 [[USE_GENERIC_STATE_MACHINE:%.*]], i1 [[TMP2:%.*]]) {
-; CHECK-NEXT:    store i1 false, i1* @V, align 4
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @unknown()
-; CHECK-NEXT:    ret i32 [[CALL]]
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
-; CHECK-SAME: (i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR7:[0-9]+]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@no_parallel_region_in_here.internalized
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2:[0-9]+]]) #[[ATTR2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR2]]
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; CHECK:       omp_if.then:
-; CHECK-NEXT:    store i32 0, i32* @G, align 4
-; CHECK-NEXT:    call void @__kmpc_end_single(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]]) #[[ATTR2]]
-; CHECK-NEXT:    br label [[OMP_IF_END]]
-; CHECK:       omp_if.end:
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* noundef @[[GLOB3:[0-9]+]], i32 [[TMP0]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@no_parallel_region_in_here
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]]
-; CHECK:       omp_if.then:
-; CHECK-NEXT:    store i32 0, i32* @G, align 4
-; CHECK-NEXT:    call void @__kmpc_end_single(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
-; CHECK-NEXT:    br label [[OMP_IF_END]]
-; CHECK:       omp_if.end:
-; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]])
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_l19
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true)
-; CHECK-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; CHECK:       worker_state_machine.begin:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; CHECK-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; CHECK-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
-; CHECK-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; CHECK:       worker_state_machine.finished:
-; CHECK-NEXT:    ret void
-; CHECK:       worker_state_machine.is_active.check:
-; CHECK-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check:
-; CHECK-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__2_wrapper
-; CHECK-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute:
-; CHECK-NEXT:    call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check1:
-; CHECK-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute2:
-; CHECK-NEXT:    call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.check3:
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.end:
-; CHECK-NEXT:    call void @__kmpc_kernel_end_parallel()
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; CHECK:       worker_state_machine.done.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; CHECK:       thread.user_code.check:
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__1(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
-; CHECK-SAME: (i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
-; CHECK-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR7]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p0() #[[ATTR8:[0-9]+]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p1() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_interprocedural_l35
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true)
-; CHECK-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; CHECK:       worker_state_machine.begin:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; CHECK-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; CHECK-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
-; CHECK-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; CHECK:       worker_state_machine.finished:
-; CHECK-NEXT:    ret void
-; CHECK:       worker_state_machine.is_active.check:
-; CHECK-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check:
-; CHECK-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__17_wrapper
-; CHECK-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute:
-; CHECK-NEXT:    call void @__omp_outlined__17_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check1:
-; CHECK-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__5_wrapper
-; CHECK-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute2:
-; CHECK-NEXT:    call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.check3:
-; CHECK-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE5:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK6:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute5:
-; CHECK-NEXT:    call void @__omp_outlined__18_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.check6:
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.end:
-; CHECK-NEXT:    call void @__kmpc_kernel_end_parallel()
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; CHECK:       worker_state_machine.done.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; CHECK:       thread.user_code.check:
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__4(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__4
-; CHECK-SAME: (i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]]
-; CHECK-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR7]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR7]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__17 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__17_wrapper to i8*), i8** [[TMP1]], i64 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p1() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__18 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__18_wrapper to i8*), i8** [[TMP1]], i64 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_with_fallback_l50
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true)
-; CHECK-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; CHECK:       worker_state_machine.begin:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; CHECK-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; CHECK-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
-; CHECK-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; CHECK:       worker_state_machine.finished:
-; CHECK-NEXT:    ret void
-; CHECK:       worker_state_machine.is_active.check:
-; CHECK-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check:
-; CHECK-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__7_wrapper
-; CHECK-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute:
-; CHECK-NEXT:    call void @__omp_outlined__7_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check1:
-; CHECK-NEXT:    [[WORKER_CHECK_PARALLEL_REGION4:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__8_wrapper
-; CHECK-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION4]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute2:
-; CHECK-NEXT:    call void @__omp_outlined__8_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.fallback.execute:
-; CHECK-NEXT:    call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.end:
-; CHECK-NEXT:    call void @__kmpc_kernel_end_parallel()
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; CHECK:       worker_state_machine.done.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; CHECK:       thread.user_code.check:
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__6(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__6
-; CHECK-SAME: (i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @unknown() #[[ATTR8]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p0() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__7(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p1() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__8_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__8(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_no_openmp_attr_l61
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true)
-; CHECK-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; CHECK:       worker_state_machine.begin:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; CHECK-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; CHECK-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
-; CHECK-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; CHECK:       worker_state_machine.finished:
-; CHECK-NEXT:    ret void
-; CHECK:       worker_state_machine.is_active.check:
-; CHECK-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check:
-; CHECK-NEXT:    [[WORKER_CHECK_PARALLEL_REGION:%.*]] = icmp eq void (i16, i32)* [[WORKER_WORK_FN_ADDR_CAST]], @__omp_outlined__10_wrapper
-; CHECK-NEXT:    br i1 [[WORKER_CHECK_PARALLEL_REGION]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute:
-; CHECK-NEXT:    call void @__omp_outlined__10_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check1:
-; CHECK-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE2:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK3:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute2:
-; CHECK-NEXT:    call void @__omp_outlined__11_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.check3:
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.end:
-; CHECK-NEXT:    call void @__kmpc_kernel_end_parallel()
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; CHECK:       worker_state_machine.done.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; CHECK:       thread.user_code.check:
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__9(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__9
-; CHECK-SAME: (i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
-; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR9:[0-9]+]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__10
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p0() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__10_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__10(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__11
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p1() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__11_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__11(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_pure_l72
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef true, i1 noundef false, i1 noundef true)
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__12(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__12
-; CHECK-SAME: (i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__13
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p0() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__13_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__13(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__14
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p1() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__14_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__14(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_simple_state_machine_interprocedural_nested_recursive_l86
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true)
-; CHECK-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; CHECK:       worker_state_machine.begin:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; CHECK-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; CHECK-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
-; CHECK-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; CHECK:       worker_state_machine.finished:
-; CHECK-NEXT:    ret void
-; CHECK:       worker_state_machine.is_active.check:
-; CHECK-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check:
-; CHECK-NEXT:    br i1 true, label [[WORKER_STATE_MACHINE_PARALLEL_REGION_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_CHECK1:%.*]]
-; CHECK:       worker_state_machine.parallel_region.execute:
-; CHECK-NEXT:    call void @__omp_outlined__19_wrapper(i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; CHECK:       worker_state_machine.parallel_region.check1:
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END]]
-; CHECK:       worker_state_machine.parallel_region.end:
-; CHECK-NEXT:    call void @__kmpc_kernel_end_parallel()
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; CHECK:       worker_state_machine.done.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; CHECK:       thread.user_code.check:
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__15(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__15
-; CHECK-SAME: (i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR2]]
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR7]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized
-; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    br label [[RETURN:%.*]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]]
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]]
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after
-; CHECK-SAME: (i32 [[A:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    br label [[RETURN:%.*]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
-; CHECK-NEXT:    br label [[RETURN]]
-; CHECK:       return:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_389eb_no_state_machine_weak_callee_l106
-; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR:%.*]] = alloca i8*, align 8
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* noalias noundef nonnull readnone align 8 dereferenceable(24) @[[GLOB1]], i1 noundef false, i1 noundef false, i1 noundef true)
-; CHECK-NEXT:    [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[THREAD_IS_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
-; CHECK:       worker_state_machine.begin:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORKER_WORK_FN_ADDR]])
-; CHECK-NEXT:    [[WORKER_WORK_FN:%.*]] = load i8*, i8** [[WORKER_WORK_FN_ADDR]], align 8
-; CHECK-NEXT:    [[WORKER_WORK_FN_ADDR_CAST:%.*]] = bitcast i8* [[WORKER_WORK_FN]] to void (i16, i32)*
-; CHECK-NEXT:    [[WORKER_IS_DONE:%.*]] = icmp eq i8* [[WORKER_WORK_FN]], null
-; CHECK-NEXT:    br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
-; CHECK:       worker_state_machine.finished:
-; CHECK-NEXT:    ret void
-; CHECK:       worker_state_machine.is_active.check:
-; CHECK-NEXT:    br i1 [[WORKER_IS_ACTIVE]], label [[WORKER_STATE_MACHINE_PARALLEL_REGION_FALLBACK_EXECUTE:%.*]], label [[WORKER_STATE_MACHINE_DONE_BARRIER:%.*]]
-; CHECK:       worker_state_machine.parallel_region.fallback.execute:
-; CHECK-NEXT:    call void [[WORKER_WORK_FN_ADDR_CAST]](i16 0, i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_PARALLEL_REGION_END:%.*]]
-; CHECK:       worker_state_machine.parallel_region.end:
-; CHECK-NEXT:    call void @__kmpc_kernel_end_parallel()
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_DONE_BARRIER]]
-; CHECK:       worker_state_machine.done.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT:    br label [[WORKER_STATE_MACHINE_BEGIN]]
-; CHECK:       thread.user_code.check:
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__16(i32* noundef nonnull align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noundef nonnull align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 false, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__16
-; CHECK-SAME: (i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @weak_callee_empty() #[[ATTR7]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@weak_callee_empty
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__17
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p0() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__17_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__17(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__18
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p0() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__18_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__18(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef @[[GLOB2]]) #[[ATTR2]]
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB2]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent nounwind
-; CHECK-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after
-; CHECK-SAME: () #[[ATTR1]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__19 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__19_wrapper to i8*), i8** [[TMP1]], i64 0)
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__19
-; CHECK-SAME: (i32* noalias nofree [[DOTGLOBAL_TID_:%.*]], i32* noalias nofree [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
-; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    call void @p0() #[[ATTR8]]
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: Function Attrs: convergent norecurse nounwind
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__19_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i16 [[TMP0]], i16* [[DOTADDR]], align 2
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__19(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;

diff  --git a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
deleted file mode 100644
index 86d7990a4b95..000000000000
--- a/llvm/test/Transforms/OpenMP/custom_state_machines_remarks.ll
+++ /dev/null
@@ -1,234 +0,0 @@
-; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s
-target triple = "nvptx64"
-
-; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback [1 known parallel regions, 2 unkown parallel regions] (bad)
-; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:13:5: State machine fallback caused by this call. If it is a false positive, use `__attribute__((assume("omp_no_openmp"))` (or "omp_no_parallelism")
-; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:15:5: State machine fallback caused by this call. If it is a false positive, use `__attribute__((assume("omp_no_openmp"))` (or "omp_no_parallelism")
-; CHECK: remark: llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c:20:1: Generic-mode kernel is executed with a customized state machine [1 known parallel regions] (good)
-
-;; void unknown(void);
-;; void known(void) {
-;;   #pragma omp parallel
-;;   {
-;;     unknown();
-;;   }
-;; }
-;; 
-;; void test_fallback(void) {
-;;   #pragma omp target teams
-;;   {
-;;     unknown();
-;;     known();
-;;     unknown();
-;;   }
-;; }
-;;
-;; void no_openmp(void) __attribute__((assume("omp_no_openmp")));
-;; void test_no_fallback(void) {
-;;   #pragma omp target teams
-;;   {
-;;     known();
-;;     known();
-;;     known();
-;;     no_openmp();     // make it non-spmd
-;;   }
-;; }
-
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
-
- at 0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
- at 1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8
- at 2 = private unnamed_addr constant [82 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_fallback;11;1;;\00", align 1
- at 3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([82 x i8], [82 x i8]* @2, i32 0, i32 0) }, align 8
- at 4 = private unnamed_addr constant [114 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
- at 5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([114 x i8], [114 x i8]* @4, i32 0, i32 0) }, align 8
- at __omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
- at 6 = private unnamed_addr constant [116 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
- at 7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([116 x i8], [116 x i8]* @6, i32 0, i32 0) }, align 8
- at 8 = private unnamed_addr constant [85 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;test_no_fallback;20;1;;\00", align 1
- at 9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([85 x i8], [85 x i8]* @8, i32 0, i32 0) }, align 8
- at 10 = private unnamed_addr constant [117 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
- at 11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([117 x i8], [117 x i8]* @10, i32 0, i32 0) }, align 8
- at __omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
- at 12 = private unnamed_addr constant [73 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;known;4;1;;\00", align 1
- at 13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([73 x i8], [73 x i8]* @12, i32 0, i32 0) }, align 8
- at G = external global i32
- at llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
-
-; Function Attrs: convergent norecurse nounwind
-define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
-entry:
-  %captured_vars_addrs.i.i = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3, !dbg !18
-  %exec_user_code = icmp eq i32 %0, -1, !dbg !18
-  br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
-
-common.ret:                                       ; preds = %entry, %user_code.entry
-  ret void, !dbg !19
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3
-  call void @unknown() #6, !dbg !20
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
-  call void @unknown() #6, !dbg !27
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i1 false, i1 true) #3, !dbg !28
-  br label %common.ret
-}
-
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
-
-; Function Attrs: convergent
-declare void @unknown() local_unnamed_addr #1
-
-; Function Attrs: nounwind
-define hidden void @known() local_unnamed_addr #2 !dbg !29 {
-entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13)
-  %1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30
-  call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30
-  ret void, !dbg !31
-}
-
-; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
-
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
-
-; Function Attrs: norecurse nounwind
-define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
-entry:
-  %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i1 false, i1 true, i1 true) #3, !dbg !33
-  %exec_user_code = icmp eq i32 %0, -1, !dbg !33
-  br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
-
-common.ret:                                       ; preds = %entry, %user_code.entry
-  ret void, !dbg !34
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
-  call void @no_openmp()
-  call void @no_parallelism()
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i1 false, i1 true) #3, !dbg !46
-  br label %common.ret
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
-entry:
-  call void @unknown() #6, !dbg !48
-  ret void, !dbg !49
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 {
-entry:
-  %global_args = alloca i8**, align 8
-  call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51
-  call void @unknown() #6, !dbg !52
-  ret void, !dbg !51
-}
-
-declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
-
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr
-
-; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5
-
-; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5
-
-declare void @no_openmp() #7
-declare void @no_parallelism() #8
-
-attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #3 = { nounwind }
-attributes #4 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #5 = { argmemonly nofree nosync nounwind willreturn }
-attributes #6 = { convergent nounwind }
-attributes #7 = { "llvm.assume"="omp_no_openmp" }
-attributes #8 = { "llvm.assume"="omp_no_parallelism" }
-
-!llvm.dbg.cu = !{!0}
-!omp_offload.info = !{!3, !4}
-!nvvm.annotations = !{!5, !6}
-!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
-!llvm.ident = !{!14}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: DebugDirectivesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
-!1 = !DIFile(filename: "custom_state_machines_remarks.c", directory: "/data/src/llvm-project")
-!2 = !{}
-!3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
-!4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
-!5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
-!6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
-!7 = !{i32 7, !"Dwarf Version", i32 2}
-!8 = !{i32 2, !"Debug Info Version", i32 3}
-!9 = !{i32 1, !"wchar_size", i32 4}
-!10 = !{i32 7, !"openmp", i32 50}
-!11 = !{i32 7, !"openmp-device", i32 50}
-!12 = !{i32 7, !"PIC Level", i32 2}
-!13 = !{i32 7, !"frame-pointer", i32 2}
-!14 = !{!"clang version 13.0.0"}
-!15 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_fallback_l11", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!16 = !DIFile(filename: "llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c", directory: "/data/src/llvm-project")
-!17 = !DISubroutineType(types: !2)
-!18 = !DILocation(line: 11, column: 1, scope: !15)
-!19 = !DILocation(line: 0, scope: !15)
-!20 = !DILocation(line: 13, column: 5, scope: !21, inlinedAt: !22)
-!21 = distinct !DISubprogram(name: "__omp_outlined__", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!22 = distinct !DILocation(line: 11, column: 1, scope: !15)
-!23 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !25)
-!24 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!25 = distinct !DILocation(line: 14, column: 5, scope: !21, inlinedAt: !22)
-!26 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !25)
-!27 = !DILocation(line: 15, column: 5, scope: !21, inlinedAt: !22)
-!28 = !DILocation(line: 11, column: 25, scope: !15)
-!29 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!30 = !DILocation(line: 4, column: 1, scope: !29)
-!31 = !DILocation(line: 8, column: 1, scope: !29)
-!32 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_no_fallback_l20", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!33 = !DILocation(line: 20, column: 1, scope: !32)
-!34 = !DILocation(line: 0, scope: !32)
-!35 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !36)
-!36 = distinct !DILocation(line: 22, column: 5, scope: !37, inlinedAt: !38)
-!37 = distinct !DISubprogram(name: "__omp_outlined__1", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!38 = distinct !DILocation(line: 20, column: 1, scope: !32)
-!39 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !36)
-!40 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !41)
-!41 = distinct !DILocation(line: 23, column: 5, scope: !37, inlinedAt: !38)
-!42 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !41)
-!43 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !44)
-!44 = distinct !DILocation(line: 24, column: 5, scope: !37, inlinedAt: !38)
-!45 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !44)
-!46 = !DILocation(line: 20, column: 25, scope: !32)
-!47 = distinct !DISubprogram(name: "__omp_outlined__2", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!48 = !DILocation(line: 6, column: 5, scope: !47)
-!49 = !DILocation(line: 7, column: 3, scope: !47)
-!50 = distinct !DISubprogram(linkageName: "__omp_outlined__2_wrapper", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagArtificial, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!51 = !DILocation(line: 4, column: 1, scope: !50)
-!52 = !DILocation(line: 6, column: 5, scope: !47, inlinedAt: !53)
-!53 = distinct !DILocation(line: 4, column: 1, scope: !50)

diff  --git a/llvm/test/Transforms/OpenMP/globalization_remarks.ll b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
index 85787c96a130..a57ae974abb2 100644
--- a/llvm/test/Transforms/OpenMP/globalization_remarks.ll
+++ b/llvm/test/Transforms/OpenMP/globalization_remarks.ll
@@ -7,19 +7,15 @@ target triple = "nvptx64"
 ; CHECK: remark: globalization_remarks.c:5:7: Could not move globalized variable to the stack. Variable is potentially captured.
 ; CHECK: remark: globalization_remarks.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
-
 @S = external local_unnamed_addr global i8*
 
 define void @foo() {
 entry:
-  %c = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true)
   %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
   %x_on_stack = bitcast i8* %0 to i32*
   %1 = bitcast i32* %x_on_stack to i8*
   call void @share(i8* %1)
   call void @__kmpc_free_shared(i8* %0)
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
   ret void
 }
 
@@ -33,8 +29,6 @@ declare i8* @__kmpc_alloc_shared(i64)
 
 declare void @__kmpc_free_shared(i8*)
 
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1);
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6}

diff  --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 1c933f92339a..0635ec6b7813 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -7,28 +7,19 @@ target triple = "nvptx64"
 ; CHECK-REMARKS: remark: remove_globalization.c:4:2: Could not move globalized variable to the stack. Variable is potentially captured. Mark as noescape to override.
 ; CHECK-REMARKS: remark: remove_globalization.c:2:2: Moving globalized variable to the stack.
 ; CHECK-REMARKS: remark: remove_globalization.c:6:2: Moving globalized variable to the stack.
-; CHECK-REMARKS: remark: remove_globalization.c:4:2: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.
 
 @S = external local_unnamed_addr global i8*
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
-
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
-
 define void @kernel() {
 ; CHECK-LABEL: define {{[^@]+}}@kernel() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 false, i1 true)
 ; CHECK-NEXT:    call void @foo() #[[ATTR0:[0-9]+]]
 ; CHECK-NEXT:    call void @bar() #[[ATTR0]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true)
 ; CHECK-NEXT:    ret void
+;
 entry:
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i1 false, i1 true, i1 true)
   call void @foo()
   call void @bar()
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i1 false, i1 true)
   ret void
 }
 
@@ -50,8 +41,8 @@ define internal void @bar() {
 ; CHECK-LABEL: define {{[^@]+}}@bar
 ; CHECK-SAME: () #[[ATTR0]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]], !dbg [[DBG8:![0-9]+]]
-; CHECK-NEXT:    call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR0]]
+; CHECK-NEXT:    call void @share(i8* nofree writeonly [[TMP0]]) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    call void @__kmpc_free_shared(i8* [[TMP0]]) #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
@@ -63,18 +54,13 @@ entry:
 }
 
 define internal void @use(i8* %x) {
-; CHECK-LABEL: define {{[^@]+}}@use
-; CHECK-SAME: (i8* noalias nocapture nofree readnone [[X:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    ret void
-;
 entry:
   ret void
 }
 
 define internal void @share(i8* %x) {
 ; CHECK-LABEL: define {{[^@]+}}@share
-; CHECK-SAME: (i8* nofree writeonly [[X:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: (i8* nofree writeonly [[X:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i8* [[X]], i8** @S, align 8
 ; CHECK-NEXT:    ret void
@@ -85,12 +71,6 @@ entry:
 }
 
 define void @unused() {
-; CHECK-LABEL: define {{[^@]+}}@unused() {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i8, i64 4, align 1
-; CHECK-NEXT:    call void @use(i8* noalias readnone undef)
-; CHECK-NEXT:    ret void
-;
 entry:
   %0 = call i8* @__kmpc_alloc_shared(i64 4), !dbg !14
   call void @use(i8* %0)

diff  --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll
index 9229f673cac7..c0021bda65a6 100644
--- a/llvm/test/Transforms/OpenMP/replace_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll
@@ -3,16 +3,10 @@
 target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
 target triple = "nvptx64"
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
-
 @S = external local_unnamed_addr global i8*
- at 0 = private unnamed_addr constant [113 x i8] c";llvm/test/Transforms/OpenMP/custom_state_machines_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
- at 1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @0, i32 0, i32 0) }, align 8
 
 ; CHECK-REMARKS: remark: replace_globalization.c:5:7: Replaced globalized variable with 16 bytes of shared memory
 ; CHECK-REMARKS: remark: replace_globalization.c:5:14: Replaced globalized variable with 4 bytes of shared memory
-; CHECK-REMARKS-NOT: 6 bytes
-
 ; CHECK: [[SHARED_X:@.+]] = internal addrspace(3) global [16 x i8] undef
 ; CHECK: [[SHARED_Y:@.+]] = internal addrspace(3) global [4 x i8] undef
 
@@ -20,30 +14,25 @@ target triple = "nvptx64"
 ; CHECK: call void @__kmpc_free_shared({{.*}})
 define dso_local void @foo() {
 entry:
-  %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
   %x = call i8* @__kmpc_alloc_shared(i64 4)
   %x_on_stack = bitcast i8* %x to i32*
   %0 = bitcast i32* %x_on_stack to i8*
   call void @use(i8* %0)
   call void @__kmpc_free_shared(i8* %x)
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
   ret void
 }
 
 define void @bar() {
-  %c = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
   call void @baz()
   call void @qux()
-  call void @negative_qux_spmd()
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
   ret void
 }
 
-; CHECK: call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* [[SHARED_X]], i32 0, i32 0) to i8*))
+; CHECK: %{{.*}} = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* [[SHARED_X]], i32 0, i32 0) to i8*) to [4 x i32]*
 define internal void @baz() {
 entry:
-  %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 true)
-  %cmp = icmp eq i32 %call, -1
+  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %cmp = icmp eq i32 %tid, 0
   br i1 %cmp, label %master, label %exit
 master:
   %x = call i8* @__kmpc_alloc_shared(i64 16), !dbg !11
@@ -56,33 +45,23 @@ exit:
   ret void
 }
 
-; CHECK: call void @use.internalized(i8* nofree writeonly addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* [[SHARED_Y]], i32 0, i32 0) to i8*))
+; CHECK: %{{.*}} = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* [[SHARED_Y]], i32 0, i32 0) to i8*) to [4 x i32]*
 define internal void @qux() {
 entry:
-  %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true)
-  %0 = icmp eq i32 %call, -1
-  br i1 %0, label %master, label %exit
+  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %ntid = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
+  %warpsize = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
+  %0 = sub nuw i32 %warpsize, 1
+  %1 = sub nuw i32 %ntid, 1
+  %2 = xor i32 %0, -1
+  %master_tid = and i32 %1, %2
+  %3 = icmp eq i32 %tid, %master_tid
+  br i1 %3, label %master, label %exit
 master:
   %y = call i8* @__kmpc_alloc_shared(i64 4), !dbg !12
   %y_on_stack = bitcast i8* %y to [4 x i32]*
-  %1 = bitcast [4 x i32]* %y_on_stack to i8*
-  call void @use(i8* %1)
-  call void @__kmpc_free_shared(i8* %y)
-  br label %exit
-exit:
-  ret void
-}
-
-define internal void @negative_qux_spmd() {
-entry:
-  %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 true, i1 true, i1 true)
-  %0 = icmp eq i32 %call, -1
-  br i1 %0, label %master, label %exit
-master:
-  %y = call i8* @__kmpc_alloc_shared(i64 6), !dbg !12
-  %y_on_stack = bitcast i8* %y to [6 x i32]*
-  %1 = bitcast [6 x i32]* %y_on_stack to i8*
-  call void @use(i8* %1)
+  %4 = bitcast [4 x i32]* %y_on_stack to i8*
+  call void @use(i8* %4)
   call void @__kmpc_free_shared(i8* %y)
   br label %exit
 exit:
@@ -106,9 +85,6 @@ declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 
 declare i32 @llvm.nvvm.read.ptx.sreg.warpsize()
 
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
-
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4, !5, !6}

diff  --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
index 834d6368f755..5fff563d364d 100644
--- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
+++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll
@@ -3,41 +3,25 @@
 ; REQUIRES: asserts
 ; ModuleID = 'single_threaded_exeuction.c'
 
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
-
- at 0 = private unnamed_addr constant [1 x i8] c"\00", align 1
- at 1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @0, i32 0, i32 0) }, align 8
-
-
-; CHECK-NOT: [openmp-opt] Basic block @kernel entry is executed by a single thread.
-; CHECK: [openmp-opt] Basic block @kernel if.then is executed by a single thread.
-; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread.
-; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread.
-define void @kernel() {
-  %call = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 false, i1 false)
-  %cmp = icmp eq i32 %call, -1
-  br i1 %cmp, label %if.then, label %if.else
-if.then:
+define weak void @kernel() {
+  call void @__kmpc_kernel_init(i32 512, i16 1)
   call void @nvptx()
   call void @amdgcn()
-  br label %if.end
-if.else:
-  br label %if.end
-if.end:
-  call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
   ret void
 }
 
 ; REMARKS: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible.
 ; REMARKS-NOT: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible.
 
-; CHECK-DAG: [openmp-opt] Basic block @nvptx entry is executed by a single thread.
-; CHECK-DAG: [openmp-opt] Basic block @nvptx if.then is executed by a single thread.
-; CHECK-DAG: [openmp-opt] Basic block @nvptx if.end is executed by a single thread.
+; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread.
+; CHECK: [openmp-opt] Basic block @nvptx if.then is executed by a single thread.
+; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread.
 ; Function Attrs: noinline
 define internal void @nvptx() {
 entry:
-  br i1 true, label %if.then, label %if.end
+  %call = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %cmp = icmp eq i32 %call, 0
+  br i1 %cmp, label %if.then, label %if.end
 
 if.then:
   call void @foo()
@@ -50,13 +34,15 @@ if.end:
   ret void
 }
 
-; CHECK-DAG: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
-; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread.
-; CHECK-DAG: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
+; CHECK-NOT: [openmp-opt] Basic block @amdgcn entry is executed by a single thread.
+; CHECK: [openmp-opt] Basic block @amdgcn if.then is executed by a single thread.
+; CHECK-NOT: [openmp-opt] Basic block @amdgcn if.end is executed by a single thread.
 ; Function Attrs: noinline
 define internal void @amdgcn() {
 entry:
-  br i1 false, label %if.then, label %if.end
+  %call = call i32 @llvm.amdgcn.workitem.id.x()
+  %cmp = icmp eq i32 %call, 0
+  br i1 %cmp, label %if.then, label %if.end
 
 if.then:
   call void @foo()
@@ -101,10 +87,7 @@ declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 
 declare i32 @llvm.amdgcn.workitem.id.x()
 
-declare void @__kmpc_kernel_prepare_parallel(i8*)
-
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
+declare void @__kmpc_kernel_init(i32, i16)
 
 attributes #0 = { cold noinline }
 

diff  --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
deleted file mode 100644
index 6ecda643acdb..000000000000
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ /dev/null
@@ -1,214 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
-; RUN: opt -S -passes=openmp-opt < %s | FileCheck %s
-
-;; void unknown(void);
-;; void spmd_amenable(void) __attribute__((assume("ompx_spmd_amenable")))
-;;
-;; void sequential_loop() {
-;;   #pragma omp target teams
-;;   {
-;;     for (int i = 0; i < 100; ++i) {
-;;       #pragma omp parallel
-;;       {
-;;         unknown();
-;;       }
-;;     }
-;      spmd_amenable();
-;;   }
-;; }
-
-target triple = "nvptx64"
-
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
-
- at 0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
- at 1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
- at __omp_offloading_2c_38c77_sequential_loop_l4_exec_mode = weak constant i8 1
- at llvm.compiler.used = appending global [1 x i8*] [i8* @__omp_offloading_2c_38c77_sequential_loop_l4_exec_mode], section "llvm.metadata"
-
-; The second argument of __kmpc_target_init and deinit is is set to true to indicate that we can run in SPMD mode.
-; We also adjusted the global __omp_offloading_2c_38c77_sequential_loop_l4_exec_mode to have a zero initializer (which indicates SPMD mode to the runtime).
-;.
-; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; CHECK: @[[__OMP_OFFLOADING_2C_38C77_SEQUENTIAL_LOOP_L4_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 0
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2c_38c77_sequential_loop_l4_exec_mode], section "llvm.metadata"
-;.
-define weak void @__omp_offloading_2c_38c77_sequential_loop_l4() #0 {
-; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2c_38c77_sequential_loop_l4
-; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i1 true, i1 false, i1 true)
-; CHECK-NEXT:    [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
-; CHECK-NEXT:    br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
-; CHECK:       user_code.entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4
-; CHECK-NEXT:    call void @__omp_outlined__(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(4) [[DOTTHREADID_TEMP_]], i32* noalias nocapture noundef nonnull readnone align 4 dereferenceable(4) [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i1 true, i1 true)
-; CHECK-NEXT:    ret void
-; CHECK:       worker.exit:
-; CHECK-NEXT:    ret void
-;
-entry:
-  %.zero.addr = alloca i32, align 4
-  %.threadid_temp. = alloca i32, align 4
-  store i32 0, i32* %.zero.addr, align 4
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* @1, i1 false, i1 true, i1 true)
-  %exec_user_code = icmp eq i32 %0, -1
-  br i1 %exec_user_code, label %user_code.entry, label %worker.exit
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
-  store i32 %1, i32* %.threadid_temp., align 4
-  call void @__omp_outlined__(i32* %.threadid_temp., i32* %.zero.addr) #2
-  call void @__kmpc_target_deinit(%struct.ident_t* @1, i1 false, i1 true)
-  ret void
-
-worker.exit:                                      ; preds = %entry
-  ret void
-}
-
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1)
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__
-; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8
-; CHECK-NEXT:    br label [[FOR_COND:%.*]]
-; CHECK:       for.cond:
-; CHECK-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 100
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
-; CHECK-NEXT:    br label [[FOR_INC]]
-; CHECK:       for.inc:
-; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
-; CHECK:       for.end:
-; CHECK-NEXT:    call void @spmd_amenable()
-; CHECK-NEXT:    ret void
-;
-entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
-  %cmp = icmp slt i32 %i.0, 100
-  br i1 %cmp, label %for.body, label %for.end
-
-for.body:                                         ; preds = %for.cond
-  %0 = load i32, i32* %.global_tid., align 4
-  %1 = bitcast [0 x i8*]* %captured_vars_addrs to i8**
-  call void @__kmpc_parallel_51(%struct.ident_t* @1, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** %1, i64 0)
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %inc = add nsw i32 %i.0, 1
-  br label %for.cond, !llvm.loop !6
-
-for.end:                                          ; preds = %for.cond
-  call void @spmd_amenable()
-  ret void
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #0 {
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1
-; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @unknown() #[[ATTR4:[0-9]+]]
-; CHECK-NEXT:    ret void
-;
-entry:
-  call void @unknown() #3
-  ret void
-}
-
-; Function Attrs: convergent
-declare void @unknown() #1
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #0 {
-; CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper
-; CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8
-; CHECK-NEXT:    store i32 0, i32* [[DOTZERO_ADDR]], align 4
-; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DOTADDR1]], align 4
-; CHECK-NEXT:    call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]])
-; CHECK-NEXT:    call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]]
-; CHECK-NEXT:    ret void
-;
-entry:
-  %.addr1 = alloca i32, align 4
-  %.zero.addr = alloca i32, align 4
-  %global_args = alloca i8**, align 8
-  store i32 0, i32* %.zero.addr, align 4
-  store i32 %1, i32* %.addr1, align 4
-  call void @__kmpc_get_shared_variables(i8*** %global_args)
-  call void @__omp_outlined__1(i32* %.addr1, i32* %.zero.addr) #2
-  ret void
-}
-
-declare void @__kmpc_get_shared_variables(i8***)
-
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64)
-
-; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #2
-
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
-
-declare void @spmd_amenable() #4
-
-attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #2 = { nounwind }
-attributes #3 = { convergent }
-attributes #4 = { "llvm.assume"="ompx_spmd_amenable" }
-
-!omp_offload.info = !{!0}
-!nvvm.annotations = !{!1}
-!llvm.module.flags = !{!2, !3, !4, !8, !9}
-!llvm.ident = !{!5}
-
-!0 = !{i32 0, i32 44, i32 232567, !"sequential_loop", i32 4, i32 0}
-!1 = !{void ()* @__omp_offloading_2c_38c77_sequential_loop_l4, !"kernel", i32 1}
-!2 = !{i32 1, !"wchar_size", i32 4}
-!3 = !{i32 7, !"PIC Level", i32 2}
-!4 = !{i32 7, !"frame-pointer", i32 2}
-!5 = !{!"clang version 13.0.0"}
-!6 = distinct !{!6, !7}
-!7 = !{!"llvm.loop.mustprogress"}
-!8 = !{i32 7, !"openmp", i32 50}
-!9 = !{i32 7, !"openmp-device", i32 50}
-;.
-; CHECK: attributes #[[ATTR0]] = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-; CHECK: attributes #[[ATTR2]] = { nounwind }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="ompx_spmd_amenable" }
-; CHECK: attributes #[[ATTR4]] = { convergent }
-;.
-; CHECK: [[META0:![0-9]+]] = !{i32 0, i32 44, i32 232567, !"sequential_loop", i32 4, i32 0}
-; CHECK: [[META1:![0-9]+]] = !{void ()* @__omp_offloading_2c_38c77_sequential_loop_l4, !"kernel", i32 1}
-; CHECK: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-; CHECK: [[META3:![0-9]+]] = !{i32 7, !"PIC Level", i32 2}
-; CHECK: [[META4:![0-9]+]] = !{i32 7, !"frame-pointer", i32 2}
-; CHECK: [[META5:![0-9]+]] = !{i32 7, !"openmp", i32 50}
-; CHECK: [[META6:![0-9]+]] = !{i32 7, !"openmp-device", i32 50}
-; CHECK: [[META7:![0-9]+]] = !{!"clang version 13.0.0"}
-; CHECK: [[LOOP8]] = distinct !{!8, !9}
-; CHECK: [[META9:![0-9]+]] = !{!"llvm.loop.mustprogress"}
-;.

diff  --git a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll b/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
deleted file mode 100644
index 4d77063e978d..000000000000
--- a/llvm/test/Transforms/OpenMP/spmdization_remarks.ll
+++ /dev/null
@@ -1,233 +0,0 @@
-; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s
-target triple = "nvptx64"
-
-; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: Kernel will be executed in generic-mode due to this potential side-effect, consider to add `__attribute__((assume("ompx_spmd_amenable"))` to the called function 'unknown'.
-; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: Kernel will be executed in generic-mode due to this potential side-effect, consider to add `__attribute__((assume("ompx_spmd_amenable"))` to the called function 'unknown'.
-; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback [1 known parallel regions, 2 unkown parallel regions] (bad).
-; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: State machine fallback caused by this call. If it is a false positive, use `__attribute__((assume("omp_no_openmp"))` (or "omp_no_parallelism").
-; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: State machine fallback caused by this call. If it is a false positive, use `__attribute__((assume("omp_no_openmp"))` (or "omp_no_parallelism").
-; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:20:1: Generic-mode kernel is changed to SPMD-mode.
-
-;; void unknown(void);
-;; void known(void) {
-;;   #pragma omp parallel
-;;   {
-;;     unknown();
-;;   }
-;; }
-;; 
-;; void test_fallback(void) {
-;;   #pragma omp target teams
-;;   {
-;;     unknown();
-;;     known();
-;;     unknown();
-;;   }
-;; }
-;;
-;; void no_openmp(void) __attribute__((assume("omp_no_openmp")));
-;; void test_no_fallback(void) {
-;;   #pragma omp target teams
-;;   {
-;;     known();
-;;     known();
-;;     known();
-;;     spmd_amenable();
-;;   }
-;; }
-
-%struct.ident_t = type { i32, i32, i32, i32, i8* }
-
- at 0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
- at 1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @0, i32 0, i32 0) }, align 8
- at 2 = private unnamed_addr constant [72 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_fallback;11;1;;\00", align 1
- at 3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([72 x i8], [72 x i8]* @2, i32 0, i32 0) }, align 8
- at 4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
- at 5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([104 x i8], [104 x i8]* @4, i32 0, i32 0) }, align 8
- at __omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
- at 6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
- at 7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([106 x i8], [106 x i8]* @6, i32 0, i32 0) }, align 8
- at 8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1
- at 9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([75 x i8], [75 x i8]* @8, i32 0, i32 0) }, align 8
- at 10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
- at 11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([107 x i8], [107 x i8]* @10, i32 0, i32 0) }, align 8
- at __omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
- at 12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1
- at 13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([63 x i8], [63 x i8]* @12, i32 0, i32 0) }, align 8
- at G = external global i32
- at llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
-
-; Function Attrs: convergent norecurse nounwind
-define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
-entry:
-  %captured_vars_addrs.i.i = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i1 false, i1 true, i1 true) #3, !dbg !18
-  %exec_user_code = icmp eq i32 %0, -1, !dbg !18
-  br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
-
-common.ret:                                       ; preds = %entry, %user_code.entry
-  ret void, !dbg !19
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3
-  call void @unknown() #6, !dbg !20
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
-  call void @unknown() #6, !dbg !27
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i1 false, i1 true) #3, !dbg !28
-  br label %common.ret
-}
-
-declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1, i1) local_unnamed_addr
-
-; Function Attrs: convergent
-declare void @unknown() local_unnamed_addr #1
-
-; Function Attrs: nounwind
-define hidden void @known() local_unnamed_addr #2 !dbg !29 {
-entry:
-  %captured_vars_addrs = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13)
-  %1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30
-  call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30
-  ret void, !dbg !31
-}
-
-; Function Attrs: nounwind
-declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
-
-declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1) local_unnamed_addr
-
-; Function Attrs: norecurse nounwind
-define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
-entry:
-  %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
-  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i1 false, i1 true, i1 true) #3, !dbg !33
-  %exec_user_code = icmp eq i32 %0, -1, !dbg !33
-  br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
-
-common.ret:                                       ; preds = %entry, %user_code.entry
-  ret void, !dbg !34
-
-user_code.entry:                                  ; preds = %entry
-  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3
-  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8*
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42
-  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
-  %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
-  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
-  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
-  call void @spmd_amenable()
-  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i1 false, i1 true) #3, !dbg !46
-  br label %common.ret
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
-entry:
-  call void @unknown() #6, !dbg !48
-  ret void, !dbg !49
-}
-
-; Function Attrs: convergent norecurse nounwind
-define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 {
-entry:
-  %global_args = alloca i8**, align 8
-  call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51
-  call void @unknown() #6, !dbg !52
-  ret void, !dbg !51
-}
-
-declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
-
-declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr
-
-; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5
-
-; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5
-
-declare void @spmd_amenable() #7
-
-attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #3 = { nounwind }
-attributes #4 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
-attributes #5 = { argmemonly nofree nosync nounwind willreturn }
-attributes #6 = { convergent nounwind }
-attributes #7 = { "llvm.assume"="ompx_spmd_amenable" }
-
-!llvm.dbg.cu = !{!0}
-!omp_offload.info = !{!3, !4}
-!nvvm.annotations = !{!5, !6}
-!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
-!llvm.ident = !{!14}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: DebugDirectivesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
-!1 = !DIFile(filename: "spmdization_remarks.c", directory: "/data/src/llvm-project")
-!2 = !{}
-!3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
-!4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
-!5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
-!6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
-!7 = !{i32 7, !"Dwarf Version", i32 2}
-!8 = !{i32 2, !"Debug Info Version", i32 3}
-!9 = !{i32 1, !"wchar_size", i32 4}
-!10 = !{i32 7, !"openmp", i32 50}
-!11 = !{i32 7, !"openmp-device", i32 50}
-!12 = !{i32 7, !"PIC Level", i32 2}
-!13 = !{i32 7, !"frame-pointer", i32 2}
-!14 = !{!"clang version 13.0.0"}
-!15 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_fallback_l11", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!16 = !DIFile(filename: "llvm/test/Transforms/OpenMP/spmdization_remarks.c", directory: "/data/src/llvm-project")
-!17 = !DISubroutineType(types: !2)
-!18 = !DILocation(line: 11, column: 1, scope: !15)
-!19 = !DILocation(line: 0, scope: !15)
-!20 = !DILocation(line: 13, column: 5, scope: !21, inlinedAt: !22)
-!21 = distinct !DISubprogram(name: "__omp_outlined__", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!22 = distinct !DILocation(line: 11, column: 1, scope: !15)
-!23 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !25)
-!24 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!25 = distinct !DILocation(line: 14, column: 5, scope: !21, inlinedAt: !22)
-!26 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !25)
-!27 = !DILocation(line: 15, column: 5, scope: !21, inlinedAt: !22)
-!28 = !DILocation(line: 11, column: 25, scope: !15)
-!29 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!30 = !DILocation(line: 4, column: 1, scope: !29)
-!31 = !DILocation(line: 8, column: 1, scope: !29)
-!32 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_no_fallback_l20", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!33 = !DILocation(line: 20, column: 1, scope: !32)
-!34 = !DILocation(line: 0, scope: !32)
-!35 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !36)
-!36 = distinct !DILocation(line: 22, column: 5, scope: !37, inlinedAt: !38)
-!37 = distinct !DISubprogram(name: "__omp_outlined__1", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!38 = distinct !DILocation(line: 20, column: 1, scope: !32)
-!39 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !36)
-!40 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !41)
-!41 = distinct !DILocation(line: 23, column: 5, scope: !37, inlinedAt: !38)
-!42 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !41)
-!43 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !44)
-!44 = distinct !DILocation(line: 24, column: 5, scope: !37, inlinedAt: !38)
-!45 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !44)
-!46 = !DILocation(line: 20, column: 25, scope: !32)
-!47 = distinct !DISubprogram(name: "__omp_outlined__2", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!48 = !DILocation(line: 6, column: 5, scope: !47)
-!49 = !DILocation(line: 7, column: 3, scope: !47)
-!50 = distinct !DISubprogram(linkageName: "__omp_outlined__2_wrapper", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagArtificial, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
-!51 = !DILocation(line: 4, column: 1, scope: !50)
-!52 = !DILocation(line: 6, column: 5, scope: !47, inlinedAt: !53)
-!53 = distinct !DILocation(line: 4, column: 1, scope: !50)

diff  --git a/openmp/libomptarget/deviceRTLs/common/include/target.h b/openmp/libomptarget/deviceRTLs/common/include/target.h
deleted file mode 100644
index 997e93b924e2..000000000000
--- a/openmp/libomptarget/deviceRTLs/common/include/target.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//===-- target.h ---------- OpenMP device runtime target implementation ---===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Target region interfaces are simple interfaces designed to allow middle-end
-// (=LLVM) passes to analyze and transform the code. To achieve good performance
-// it may be required to run the associated passes. However, implementations of
-// this interface shall always provide a correct implementation as close to the
-// user expected code as possible.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_OPENMP_LIBOMPTARGET_DEVICERTLS_COMMON_TARGET_H
-#define LLVM_OPENMP_LIBOMPTARGET_DEVICERTLS_COMMON_TARGET_H
-
-#include <stdint.h>
-
-extern "C" {
-
-/// Forward declaration of the source location identifier "ident".
-typedef struct ident ident_t;
-
-/// The target region _kernel_ interface for GPUs
-///
-/// This deliberatly simple interface provides the middle-end (=LLVM) with
-/// easier means to reason about the semantic of the code and transform it as
-/// well. The runtime calls are therefore also desiged to carry sufficient
-/// information necessary for optimizations.
-///
-///
-/// Intended usage:
-///
-/// \code
-/// void kernel(...) {
-///   ThreadKind = __kmpc_target_init(Ident, /* IsSPMD */ false,
-///                                   /* UseGenericStateMachine */ true,
-///                                   /* RequiresFullRuntime */ ... );
-///   if (ThreadKind == -1) {
-///     // User defined kernel code.
-///   }
-///   __kmpc_target_deinit(...);
-/// }
-/// \endcode
-///
-/// Which can be transformed to:
-///
-/// \code
-/// void kernel(...) {
-///   ThreadKind = __kmpc_target_init(Ident, /* IsSPMD */ false,
-///                                   /* UseGenericStateMachine */ false,
-///                                   /* RequiresFullRuntime */ ... );
-///   if (ThreadKind == -1) {
-///     // User defined kernel code.
-///   } else {
-///     assume(ThreadKind == ThreadId);
-///     // Custom, kernel-specific state machine code.
-///   }
-///   __kmpc_target_deinit(...);
-/// }
-/// \endcode
-///
-///
-///{
-
-/// Initialization
-///
-/// Must be called by all threads.
-///
-/// \param Ident               Source location identification, can be NULL.
-///
-int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD,
-                           bool UseGenericStateMachine,
-                           bool RequiresFullRuntime);
-
-/// De-Initialization
-///
-/// Must be called by the main thread in generic mode, can be called by all
-/// threads. Must be called by all threads in SPMD mode.
-///
-/// In non-SPMD, this function releases the workers trapped in a state machine
-/// and also any memory dynamically allocated by the runtime.
-///
-/// \param Ident Source location identification, can be NULL.
-///
-void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD,
-                          bool RequiresFullRuntime);
-
-///}
-}
-#endif

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/loop.cu b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
index 709905724192..04447be28db1 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/loop.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/loop.cu
@@ -204,15 +204,15 @@ public:
   INLINE static void dispatch_init(kmp_Ident *loc, int32_t threadId,
                                    kmp_sched_t schedule, T lb, T ub, ST st,
                                    ST chunk) {
-    if (isRuntimeUninitialized()) {
+    if (checkRuntimeUninitialized(loc)) {
       // In SPMD mode no need to check parallelism level - dynamic scheduling
       // may appear only in L2 parallel regions with lightweight runtime.
-      ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected non-SPMD mode.");
+      ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected non-SPMD mode.");
       return;
     }
-    int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+    int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
     omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid);
-    T tnum = GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode());
+    T tnum = GetNumberOfOmpThreads(checkSPMDMode(loc));
     T tripCount = ub - lb + 1; // +1 because ub is inclusive
     ASSERT0(LT_FUSSY, threadId < tnum,
             "current thread is not needed here; error");
@@ -441,10 +441,10 @@ public:
 
   INLINE static int dispatch_next(kmp_Ident *loc, int32_t gtid, int32_t *plast,
                                   T *plower, T *pupper, ST *pstride) {
-    if (isRuntimeUninitialized()) {
+    if (checkRuntimeUninitialized(loc)) {
       // In SPMD mode no need to check parallelism level - dynamic scheduling
       // may appear only in L2 parallel regions with lightweight runtime.
-      ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected non-SPMD mode.");
+      ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected non-SPMD mode.");
       if (*plast)
         return DISPATCH_FINISHED;
       *plast = 1;
@@ -453,8 +453,8 @@ public:
     // ID of a thread in its own warp
 
     // automatically selects thread or warp ID based on selected implementation
-    int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
-    ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode()),
+    int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
+    ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(checkSPMDMode(loc)),
             "current thread is not needed here; error");
     // retrieve schedule
     kmp_sched_t schedule =
@@ -624,7 +624,7 @@ EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
   PRINT0(LD_IO, "call kmpc_for_static_init_4\n");
   omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
       global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
-      __kmpc_is_spmd_exec_mode());
+      checkSPMDMode(loc));
 }
 
 EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
@@ -635,7 +635,7 @@ EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
   PRINT0(LD_IO, "call kmpc_for_static_init_4u\n");
   omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
       global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
-      __kmpc_is_spmd_exec_mode());
+      checkSPMDMode(loc));
 }
 
 EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
@@ -646,7 +646,7 @@ EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
   PRINT0(LD_IO, "call kmpc_for_static_init_8\n");
   omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
       global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
-      __kmpc_is_spmd_exec_mode());
+      checkSPMDMode(loc));
 }
 
 EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
@@ -657,7 +657,7 @@ EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
   PRINT0(LD_IO, "call kmpc_for_static_init_8u\n");
   omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
       global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
-      __kmpc_is_spmd_exec_mode());
+      checkSPMDMode(loc));
 }
 
 EXTERN

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
index 34af243fab54..c117c7e00bf2 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
@@ -12,7 +12,6 @@
 #pragma omp declare target
 
 #include "common/omptarget.h"
-#include "common/support.h"
 #include "target_impl.h"
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -27,18 +26,16 @@ extern omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext,
 // init entry points
 ////////////////////////////////////////////////////////////////////////////////
 
-static void __kmpc_generic_kernel_init() {
+EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) {
   PRINT(LD_IO, "call to __kmpc_kernel_init with version %f\n",
         OMPTARGET_NVPTX_VERSION);
-
-  if (GetLaneId() == 0)
-    parallelLevel[GetWarpId()] = 0;
+  ASSERT0(LT_FUSSY, RequiresOMPRuntime,
+          "Generic always requires initialized runtime.");
+  setExecutionParameters(Generic, RuntimeInitialized);
+  for (int I = 0; I < MAX_THREADS_PER_TEAM / WARPSIZE; ++I)
+    parallelLevel[I] = 0;
 
   int threadIdInBlock = GetThreadIdInBlock();
-  if (threadIdInBlock != GetMasterThreadID())
-    return;
-
-  setExecutionParameters(Generic, RuntimeInitialized);
   ASSERT0(LT_FUSSY, threadIdInBlock == GetMasterThreadID(),
           "__kmpc_kernel_init() must be called by team master warp only!");
   PRINT0(LD_IO, "call to __kmpc_kernel_init for master\n");
@@ -50,7 +47,7 @@ static void __kmpc_generic_kernel_init() {
       omptarget_nvptx_device_State[slot].Dequeue();
 
   // init thread private
-  int threadId = 0;
+  int threadId = GetLogicalThreadIdInBlock(/*isSPMDExecutionMode=*/false);
   omptarget_nvptx_threadPrivateContext->InitThreadPrivateContext(threadId);
 
   // init team context
@@ -65,17 +62,20 @@ static void __kmpc_generic_kernel_init() {
   // set number of threads and thread limit in team to started value
   omptarget_nvptx_TaskDescr *currTaskDescr =
       omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
-  nThreads = GetNumberOfWorkersInTeam();
-  threadLimit = nThreads;
+  nThreads = GetNumberOfThreadsInBlock();
+  threadLimit = ThreadLimit;
 
-  omptarget_nvptx_globalArgs.Init();
+  if (!__kmpc_is_spmd_exec_mode())
+    omptarget_nvptx_globalArgs.Init();
 
   __kmpc_data_sharing_init_stack();
   __kmpc_impl_target_init();
 }
 
-static void __kmpc_generic_kernel_deinit() {
+EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized) {
   PRINT0(LD_IO, "call to __kmpc_kernel_deinit\n");
+  ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized,
+          "Generic always requires initialized runtime.");
   // Enqueue omp state object for use by another team.
   int slot = usedSlotIdx;
   omptarget_nvptx_device_State[slot].Enqueue(
@@ -84,11 +84,12 @@ static void __kmpc_generic_kernel_deinit() {
   omptarget_nvptx_workFn = 0;
 }
 
-static void __kmpc_spmd_kernel_init(bool RequiresFullRuntime) {
+EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit,
+                                    int16_t RequiresOMPRuntime) {
   PRINT0(LD_IO, "call to __kmpc_spmd_kernel_init\n");
 
-  setExecutionParameters(Spmd, RequiresFullRuntime ? RuntimeInitialized
-                         : RuntimeUninitialized);
+  setExecutionParameters(Spmd, RequiresOMPRuntime ? RuntimeInitialized
+                                                  : RuntimeUninitialized);
   int threadId = GetThreadIdInBlock();
   if (threadId == 0) {
     usedSlotIdx = __kmpc_impl_smid() % MAX_SM;
@@ -99,8 +100,11 @@ static void __kmpc_spmd_kernel_init(bool RequiresFullRuntime) {
         1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
   }
   __kmpc_data_sharing_init_stack();
-  if (!RequiresFullRuntime)
+  if (!RequiresOMPRuntime) {
+    // Runtime is not required - exit.
+    __kmpc_impl_syncthreads();
     return;
+  }
 
   //
   // Team Context Initialization.
@@ -134,17 +138,16 @@ static void __kmpc_spmd_kernel_init(bool RequiresFullRuntime) {
                                                              newTaskDescr);
 
   // init thread private from init value
-  int ThreadLimit = GetNumberOfProcsInTeam(/* IsSPMD */ true);
   PRINT(LD_PAR,
         "thread will execute parallel region with id %d in a team of "
         "%d threads\n",
         (int)newTaskDescr->ThreadId(), (int)ThreadLimit);
 }
 
-static void __kmpc_spmd_kernel_deinit(bool RequiresFullRuntime) {
+EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
   // We're not going to pop the task descr stack of each thread since
   // there are no more parallel regions in SPMD mode.
-  if (!RequiresFullRuntime)
+  if (!RequiresOMPRuntime)
     return;
 
   __kmpc_impl_syncthreads();
@@ -162,68 +165,4 @@ EXTERN int8_t __kmpc_is_spmd_exec_mode() {
   return (execution_param & ModeMask) == Spmd;
 }
 
-EXTERN bool __kmpc_kernel_parallel(void**WorkFn);
-
-static void __kmpc_target_region_state_machine(ident_t *Ident) {
-
-  int TId = GetThreadIdInBlock();
-  do {
-    void* WorkFn = 0;
-
-    // Wait for the signal that we have a new work function.
-    __kmpc_barrier_simple_spmd(Ident, TId);
-
-
-    // Retrieve the work function from the runtime.
-    bool IsActive = __kmpc_kernel_parallel(&WorkFn);
-
-    // If there is nothing more to do, break out of the state machine by
-    // returning to the caller.
-    if (!WorkFn)
-      return;
-
-    if (IsActive) {
-      ((void(*)(uint32_t,uint32_t))WorkFn)(0, TId);
-      __kmpc_kernel_end_parallel();
-    }
-
-    __kmpc_barrier_simple_spmd(Ident, TId);
-
-  } while (true);
-}
-
-EXTERN
-int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD,
-                           bool UseGenericStateMachine,
-                           bool RequiresFullRuntime) {
-  int TId = GetThreadIdInBlock();
-  if (IsSPMD)
-    __kmpc_spmd_kernel_init(RequiresFullRuntime);
-  else
-    __kmpc_generic_kernel_init();
-
-   if (IsSPMD) {
-    __kmpc_barrier_simple_spmd(Ident, TId);
-     return -1;
-   }
-
-   if (TId == GetMasterThreadID())
-     return -1;
-
-  if (UseGenericStateMachine)
-    __kmpc_target_region_state_machine(Ident);
-
-  return TId;
-}
-
-EXTERN
-void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD,
-                           bool RequiresFullRuntime) {
-  if (IsSPMD)
-    __kmpc_spmd_kernel_deinit(RequiresFullRuntime);
-  else
-    __kmpc_generic_kernel_deinit();
-}
-
-
 #pragma omp end declare target

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
index d404015eb881..29a6db85d172 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
@@ -181,14 +181,14 @@ EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) {
 
   IncParallelLevel(/*ActiveParallel=*/false, __kmpc_impl_activemask());
 
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(),
+  if (checkRuntimeUninitialized(loc)) {
+    ASSERT0(LT_FUSSY, checkSPMDMode(loc),
             "Expected SPMD mode with uninitialized runtime.");
     return;
   }
 
   // assume this is only called for nested parallel
-  int threadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+  int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
 
   // unlike actual parallel, threads in the same team do not share
   // the workTaskDescr in this case and num threads is fixed to 1
@@ -220,14 +220,14 @@ EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
 
   DecParallelLevel(/*ActiveParallel=*/false, __kmpc_impl_activemask());
 
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(),
+  if (checkRuntimeUninitialized(loc)) {
+    ASSERT0(LT_FUSSY, checkSPMDMode(loc),
             "Expected SPMD mode with uninitialized runtime.");
     return;
   }
 
   // pop stack
-  int threadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+  int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
   omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(threadId);
   // set new top
   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(
@@ -249,8 +249,8 @@ EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid) {
 // it's cheap to recalculate this value so we never use the result
 // of this call.
 EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) {
-  int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
-  return GetOmpThreadId(tid, __kmpc_is_spmd_exec_mode());
+  int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
+  return GetOmpThreadId(tid, checkSPMDMode(loc));
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -260,9 +260,9 @@ EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) {
 EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid,
                                     int32_t num_threads) {
   PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads);
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
           "Runtime must be initialized.");
-  tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+  tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
   omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) =
       num_threads;
 }
@@ -331,7 +331,7 @@ EXTERN void __kmpc_parallel_51(kmp_Ident *ident, kmp_int32 global_tid,
         (1 + (IsActiveParallelRegion ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
 
   // Master signals work to activate workers.
-  __kmpc_barrier_simple_spmd(ident, 0);
+  __kmpc_barrier_simple_spmd(nullptr, 0);
 
   // OpenMP [2.5, Parallel Construct, p.49]
   // There is an implied barrier at the end of a parallel region. After the
@@ -339,7 +339,7 @@ EXTERN void __kmpc_parallel_51(kmp_Ident *ident, kmp_int32 global_tid,
   // execution of the enclosing task region.
   //
   // The master waits at this barrier until all workers are done.
-  __kmpc_barrier_simple_spmd(ident, 0);
+  __kmpc_barrier_simple_spmd(nullptr, 0);
 
   // Decrement parallel level for non-SPMD warps.
   for (int I = 0; I < NumWarps; ++I)

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
index da025f4acd11..4054a6e2a97b 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
@@ -159,11 +159,11 @@ int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(
     kmp_InterWarpCopyFctPtr cpyFct) {
   return nvptx_parallel_reduce_nowait(
       global_tid, num_vars, reduce_size, reduce_data, shflFct, cpyFct,
-      __kmpc_is_spmd_exec_mode(), isRuntimeUninitialized());
+      checkSPMDMode(loc), checkRuntimeUninitialized(loc));
 }
 
 INLINE static bool isMaster(kmp_Ident *loc, uint32_t ThreadId) {
-  return !__kmpc_is_spmd_exec_mode() || IsTeamMaster(ThreadId);
+  return checkGenericMode(loc) || IsTeamMaster(ThreadId);
 }
 
 INLINE static uint32_t roundToWarpsize(uint32_t s) {
@@ -184,16 +184,16 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
     kmp_ListGlobalFctPtr glredFct) {
 
   // Terminate all threads in non-SPMD mode except for the master thread.
-  if (!__kmpc_is_spmd_exec_mode() && GetThreadIdInBlock() != GetMasterThreadID())
+  if (checkGenericMode(loc) && GetThreadIdInBlock() != GetMasterThreadID())
     return 0;
 
-  uint32_t ThreadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+  uint32_t ThreadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
 
   // In non-generic mode all workers participate in the teams reduction.
   // In generic mode only the team master participates in the teams
   // reduction because the workers are waiting for parallel work.
   uint32_t NumThreads =
-      __kmpc_is_spmd_exec_mode() ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true)
+      checkSPMDMode(loc) ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true)
                          : /*Master thread only*/ 1;
   uint32_t TeamId = GetBlockIdInKernel();
   uint32_t NumTeams = GetNumberOfBlocksInKernel();
@@ -225,7 +225,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
     ChunkTeamCount = __kmpc_atomic_inc((uint32_t *)&Cnt, num_of_records - 1u);
   }
   // Synchronize
-  if (__kmpc_is_spmd_exec_mode())
+  if (checkSPMDMode(loc))
     __kmpc_barrier(loc, global_tid);
 
   // reduce_data is global or shared so before being reduced within the

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu
index d711c2a9d708..0977e2867e6e 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu
@@ -34,6 +34,57 @@ bool isRuntimeInitialized() {
   return (execution_param & RuntimeMask) == RuntimeInitialized;
 }
 
+////////////////////////////////////////////////////////////////////////////////
+// Execution Modes based on location parameter fields
+////////////////////////////////////////////////////////////////////////////////
+
+bool checkSPMDMode(kmp_Ident *loc) {
+  if (!loc)
+    return __kmpc_is_spmd_exec_mode();
+
+  // If SPMD is true then we are not in the UNDEFINED state so
+  // we can return immediately.
+  if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+    return true;
+
+  // If not in SPMD mode and runtime required is a valid
+  // combination of flags so we can return immediately.
+  if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+    return false;
+
+  // We are in underfined state.
+  return __kmpc_is_spmd_exec_mode();
+}
+
+bool checkGenericMode(kmp_Ident *loc) { return !checkSPMDMode(loc); }
+
+bool checkRuntimeUninitialized(kmp_Ident *loc) {
+  if (!loc)
+    return isRuntimeUninitialized();
+
+  // If runtime is required then we know we can't be
+  // in the undefined mode. We can return immediately.
+  if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+    return false;
+
+  // If runtime is required then we need to check is in
+  // SPMD mode or not. If not in SPMD mode then we end
+  // up in the UNDEFINED state that marks the orphaned
+  // functions.
+  if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+    return true;
+
+  // Check if we are in an UNDEFINED state. Undefined is denoted by
+  // non-SPMD + noRuntimeRequired which is a combination that
+  // cannot actually happen. Undefined states is used to mark orphaned
+  // functions.
+  return isRuntimeUninitialized();
+}
+
+bool checkRuntimeInitialized(kmp_Ident *loc) {
+  return !checkRuntimeUninitialized(loc);
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // support: get info from machine
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/sync.cu b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
index 0dfbacf68051..0a00f2fa08b8 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/sync.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/sync.cu
@@ -42,16 +42,16 @@ EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
 }
 
 EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
-  if (isRuntimeUninitialized()) {
-    ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(),
+  if (checkRuntimeUninitialized(loc_ref)) {
+    ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
             "Expected SPMD mode with uninitialized runtime.");
     __kmpc_barrier_simple_spmd(loc_ref, tid);
   } else {
-    tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+    tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref));
     int numberOfActiveOMPThreads =
-        GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode());
+        GetNumberOfOmpThreads(checkSPMDMode(loc_ref));
     if (numberOfActiveOMPThreads > 1) {
-      if (__kmpc_is_spmd_exec_mode()) {
+      if (checkSPMDMode(loc_ref)) {
         __kmpc_barrier_simple_spmd(loc_ref, tid);
       } else {
         // The #threads parameter must be rounded up to the WARPSIZE.

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/task.cu b/openmp/libomptarget/deviceRTLs/common/src/task.cu
index 23470e796d72..a5186707dfb6 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/task.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/task.cu
@@ -83,7 +83,7 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
                                          void *noAliasDepList) {
   PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
         P64(newKmpTaskDescr));
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
           "Runtime must be initialized.");
   // 1. get explicit task descr from kmp task descr
   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
@@ -96,7 +96,7 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
           "bad assumptions");
 
   // 2. push new context: update new task descriptor
-  int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+  int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
   omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
   newTaskDescr->CopyForExplicitTask(parentTaskDescr);
   // set new task descriptor as top
@@ -122,7 +122,7 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
                                       kmp_TaskDescr *newKmpTaskDescr) {
   PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
         (unsigned long long)newKmpTaskDescr);
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
           "Runtime must be initialized.");
   // 1. get explicit task descr from kmp task descr
   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
@@ -135,7 +135,7 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
           "bad assumptions");
 
   // 2. push new context: update new task descriptor
-  int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+  int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
   omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
   newTaskDescr->CopyForExplicitTask(parentTaskDescr);
   // set new task descriptor as top
@@ -148,7 +148,7 @@ EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
                                          kmp_TaskDescr *newKmpTaskDescr) {
   PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
         (unsigned long long)newKmpTaskDescr);
-  ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+  ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
           "Runtime must be initialized.");
   // 1. get explicit task descr from kmp task descr
   omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
@@ -163,7 +163,7 @@ EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
   omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr();
   // 3... noting to call... is inline
   // 4. pop context
-  int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
+  int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
   omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
                                                              parentTaskDescr);
   // 5. free

diff  --git a/openmp/libomptarget/deviceRTLs/common/support.h b/openmp/libomptarget/deviceRTLs/common/support.h
index 92a3f82df374..4e9ce39b3040 100644
--- a/openmp/libomptarget/deviceRTLs/common/support.h
+++ b/openmp/libomptarget/deviceRTLs/common/support.h
@@ -36,6 +36,15 @@ bool isGenericMode();
 bool isRuntimeUninitialized();
 bool isRuntimeInitialized();
 
+////////////////////////////////////////////////////////////////////////////////
+// Execution Modes based on location parameter fields
+////////////////////////////////////////////////////////////////////////////////
+
+bool checkSPMDMode(kmp_Ident *loc);
+bool checkGenericMode(kmp_Ident *loc);
+bool checkRuntimeUninitialized(kmp_Ident *loc);
+bool checkRuntimeInitialized(kmp_Ident *loc);
+
 ////////////////////////////////////////////////////////////////////////////////
 // get info from machine
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h
index e0c433060c85..082b6b9d1109 100644
--- a/openmp/libomptarget/deviceRTLs/interface.h
+++ b/openmp/libomptarget/deviceRTLs/interface.h
@@ -416,11 +416,11 @@ EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
                              int32_t cancelVal);
 
 // non standard
-EXTERN int32_t __kmpc_target_init(ident_t *Ident, bool IsSPMD,
-                                 bool UseGenericStateMachine,
-                           bool RequiresFullRuntime);
-EXTERN void __kmpc_target_deinit(ident_t *Ident, bool IsSPMD,
-                           bool RequiresFullRuntime);
+EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime);
+EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit,
+                                    int16_t RequiresOMPRuntime);
+EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
 EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn);
 EXTERN bool __kmpc_kernel_parallel(void **WorkFn);
 EXTERN void __kmpc_kernel_end_parallel();

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
index 35324f070e4d..eafa73426a95 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.cu
@@ -60,13 +60,7 @@ EXTERN __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
   return Mask;
 }
 
-EXTERN void __kmpc_impl_syncthreads() { 
-  int barrier = 2;
-  asm volatile("barrier.sync %0;"
-               :
-               : "r"(barrier)
-               : "memory");
-}
+EXTERN void __kmpc_impl_syncthreads() { __syncthreads(); }
 
 EXTERN void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
   __nvvm_bar_warp_sync(Mask);
@@ -81,7 +75,7 @@ EXTERN void __kmpc_impl_named_sync(uint32_t num_threads) {
   // The named barrier for active parallel threads of a team in an L1 parallel
   // region to synchronize with each other.
   int barrier = 1;
-  asm volatile("barrier.sync %0, %1;"
+  asm volatile("bar.sync %0, %1;"
                :
                : "r"(barrier), "r"(num_threads)
                : "memory");


        


More information about the Openmp-commits mailing list