[llvm] 73ece23 - Revert "[OpenMP][NFCI] Embed the source location string size in the ident_t"

Mehdi Amini via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 28 23:01:48 PST 2021


Author: Mehdi Amini
Date: 2021-12-29T06:57:36Z
New Revision: 73ece231ee0cf048d56841f47915beb1db6afc26

URL: https://github.com/llvm/llvm-project/commit/73ece231ee0cf048d56841f47915beb1db6afc26
DIFF: https://github.com/llvm/llvm-project/commit/73ece231ee0cf048d56841f47915beb1db6afc26.diff

LOG: Revert "[OpenMP][NFCI] Embed the source location string size in the ident_t"

This reverts commit 7bfcdbcbf368cea14a5236080af975d5878a46eb.
Broke MLIR build

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/barrier_codegen.cpp
    clang/test/OpenMP/for_codegen.cpp
    clang/test/OpenMP/nvptx_SPMD_codegen.cpp
    clang/test/OpenMP/parallel_num_threads_codegen.cpp
    clang/test/OpenMP/parallel_proc_bind_codegen.cpp
    clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp
    clang/test/OpenMP/sections_codegen.cpp
    llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
    llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
    llvm/lib/Transforms/IPO/OpenMPOpt.cpp
    llvm/test/Transforms/OpenMP/deduplication.ll
    llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
    llvm/test/Transforms/OpenMP/parallel_region_merging.ll
    llvm/test/Transforms/OpenMP/remove_globalization.ll
    llvm/test/Transforms/OpenMP/spmdization.ll
    llvm/test/Transforms/OpenMP/spmdization_assumes.ll
    llvm/test/Transforms/OpenMP/spmdization_guarding.ll

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 40e2094ea4ce7..c314044c66dd7 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1429,11 +1429,10 @@ static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
                                                  SourceLocation Loc,
                                                  unsigned Flags) {
-  uint32_t SrcLocStrSize;
   llvm::Constant *SrcLocStr;
   if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
       Loc.isInvalid()) {
-    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
+    SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
   } else {
     std::string FunctionName;
     if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
@@ -1442,12 +1441,12 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
     const char *FileName = PLoc.getFilename();
     unsigned Line = PLoc.getLine();
     unsigned Column = PLoc.getColumn();
-    SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
-                                                Column, SrcLocStrSize);
+    SrcLocStr =
+        OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, Column);
   }
   unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
-  return OMPBuilder.getOrCreateIdent(
-      SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
+  return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags),
+                                     Reserved2Flags);
 }
 
 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
@@ -1458,11 +1457,10 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
   if (CGM.getLangOpts().OpenMPIRBuilder) {
     SmallString<128> Buffer;
     OMPBuilder.updateToLocation(CGF.Builder.saveIP());
-    uint32_t SrcLocStrSize;
     auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
-        getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
+        getIdentStringFromSourceLocation(CGF, Loc, Buffer));
     return OMPBuilder.getOrCreateThreadID(
-        OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
+        OMPBuilder.getOrCreateIdent(SrcLocStr));
   }
 
   llvm::Value *ThreadID = nullptr;
@@ -9529,9 +9527,8 @@ llvm::Constant *
 emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
                        MappableExprsHandler::MappingExprInfo &MapExprs) {
 
-  uint32_t SrcLocStrSize;
   if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
-    return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
+    return OMPBuilder.getOrCreateDefaultSrcLocStr();
 
   SourceLocation Loc;
   if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
@@ -9555,8 +9552,7 @@ emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
 
   PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
   return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
-                                         PLoc.getLine(), PLoc.getColumn(),
-                                         SrcLocStrSize);
+                                         PLoc.getLine(), PLoc.getColumn());
 }
 
 /// Emit the arrays used to pass the captures and map information to the

diff  --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp
index 89704b36e13b5..35b2ed7212761 100644
--- a/clang/test/OpenMP/barrier_codegen.cpp
+++ b/clang/test/OpenMP/barrier_codegen.cpp
@@ -19,8 +19,8 @@
 #define HEADER
 
 // CHECK: [[IDENT_T:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) }
-// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 {{[0-9]+}}, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) }
+// CHECK-DAG: [[EXPLICIT_BARRIER_LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) }
+// CHECK-DAG: [[LOC:@.+]] = {{.+}} [[IDENT_T]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* @{{.+}}, i32 0, i32 0) }
 
 void foo() {}
 

diff  --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp
index 813b1313f1ee8..64652e530f6a7 100644
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -22,8 +22,8 @@
 // PROF-INSTR-PATH: constant [25 x i8] c"for_codegen-test.profraw\00"
 
 // CHECK: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 {{[0-9]+}}, i8*
-// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 {{[0-9]+}}, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[LOOP_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 514, i32 0, i32 0, i8*
 // CHECK-DAG: [[I:@.+]] ={{.*}} global i8 1,
 // CHECK-DAG: [[J:@.+]] ={{.*}} global i8 2,
 // CHECK-DAG: [[K:@.+]] ={{.*}} global i8 3,

diff  --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
index c2e98eec0cd94..eebc41b44f849 100644
--- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
@@ -11,13 +11,13 @@
 int a;
 
 // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
-// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds
-// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds
-// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds
-// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
-// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
-// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 {{[0-9]+}}, i8* getelementptr inbounds
-// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 {{[0-9]+}}, i8* getelementptr inbounds
+// CHECK-DAG: [[DISTR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 3, i32 0, i8* getelementptr inbounds
+// CHECK-DAG: [[FOR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 514, i32 3, i32 0, i8* getelementptr inbounds
+// CHECK-DAG: [[LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 3, i32 0, i8* getelementptr inbounds
+// CHECK-DAG: [[DISTR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2050, i32 1, i32 0, i8* getelementptr inbounds
+// CHECK-DAG: [[FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 1, i32 0, i8* getelementptr inbounds
+// CHECK-DAG: [[BAR_LIGHT:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 3, i32 0, i8* getelementptr inbounds
+// CHECK-DAG: [[BAR_FULL:@.+]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 66, i32 1, i32 0, i8* getelementptr inbounds
 // CHECK-NOT: @__omp_offloading_{{.+}}_exec_mode = weak constant i8 1
 
 void foo() {

diff  --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
index e22f730570354..0fdb6cfbcfefe 100644
--- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
@@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[S_TY:%.+]] = type { [[INTPTR_T_TY:i[0-9]+]], [[INTPTR_T_TY]], [[INTPTR_T_TY]] }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 

diff  --git a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp
index e6103335b778d..8b9e09191b24b 100644
--- a/clang/test/OpenMP/parallel_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/parallel_proc_bind_codegen.cpp
@@ -14,7 +14,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 

diff  --git a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp
index 98b7f0374345d..34de488c1dfc3 100644
--- a/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp
+++ b/clang/test/OpenMP/parallel_proc_bind_primary_codegen.cpp
@@ -15,7 +15,7 @@ typedef __INTPTR_TYPE__ intptr_t;
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
-// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+// CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant [[IDENT_T_TY]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 void foo();
 

diff  --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp
index f9cdc5cc7ed11..07b95f53fa62c 100644
--- a/clang/test/OpenMP/sections_codegen.cpp
+++ b/clang/test/OpenMP/sections_codegen.cpp
@@ -9,8 +9,8 @@
 // expected-no-diagnostics
 #ifndef HEADER
 #define HEADER
-// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 {{[0-9]+}}, i8*
-// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 {{[0-9]+}}, i8*
+// CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
+// CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8*
 // CHECK-LABEL: foo
 void foo() { extern void mayThrow(); mayThrow(); };
 // CHECK-LABEL: bar

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index fa92446492068..9976d1961ed11 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -663,31 +663,28 @@ class OpenMPIRBuilder {
   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
 
   /// Return the (LLVM-IR) string describing the source location \p LocStr.
-  Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
+  Constant *getOrCreateSrcLocStr(StringRef LocStr);
 
   /// Return the (LLVM-IR) string describing the default source location.
-  Constant *getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize);
+  Constant *getOrCreateDefaultSrcLocStr();
 
   /// Return the (LLVM-IR) string describing the source location identified by
   /// the arguments.
   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
-                                 unsigned Line, unsigned Column,
-                                 uint32_t &SrcLocStrSize);
+                                 unsigned Line, unsigned Column);
 
   /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
   /// fallback if \p DL does not specify the function name.
-  Constant *getOrCreateSrcLocStr(DebugLoc DL, uint32_t &SrcLocStrSize,
-                                 Function *F = nullptr);
+  Constant *getOrCreateSrcLocStr(DebugLoc DL, Function *F = nullptr);
 
   /// Return the (LLVM-IR) string describing the source location \p Loc.
-  Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
-                                 uint32_t &SrcLocStrSize);
+  Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
 
   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
   /// TODO: Create a enum class for the Reserve2Flags
-  Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
-                             omp::IdentFlag Flags = omp::IdentFlag(0),
-                             unsigned Reserve2Flags = 0);
+  Value *getOrCreateIdent(Constant *SrcLocStr,
+                          omp::IdentFlag Flags = omp::IdentFlag(0),
+                          unsigned Reserve2Flags = 0);
 
   /// Create a global flag \p Namein the module with initial value \p Value.
   GlobalValue *createGlobalFlag(unsigned Value, StringRef Name);
@@ -757,7 +754,7 @@ class OpenMPIRBuilder {
   StringMap<Constant *> SrcLocStrMap;
 
   /// Map to remember existing ident_t*.
-  DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
+  DenseMap<std::pair<Constant *, uint64_t>, Value *> IdentMap;
 
   /// Helper that contains information about regions we need to outline
   /// during finalization.

diff  --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index de2507631f00d..5157d51fd18ca 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -21,9 +21,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PassManager.h"
@@ -39,7 +37,6 @@
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 
-#include <cstdint>
 #include <sstream>
 
 #define DEBUG_TYPE "openmp-ir-builder"
@@ -258,21 +255,19 @@ GlobalValue *OpenMPIRBuilder::createGlobalFlag(unsigned Value, StringRef Name) {
   return GV;
 }
 
-Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
-                                            uint32_t SrcLocStrSize,
-                                            IdentFlag LocFlags,
-                                            unsigned Reserve2Flags) {
+Value *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
+                                         IdentFlag LocFlags,
+                                         unsigned Reserve2Flags) {
   // Enable "C-mode".
   LocFlags |= OMP_IDENT_FLAG_KMPC;
 
-  Constant *&Ident =
+  Value *&Ident =
       IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
   if (!Ident) {
     Constant *I32Null = ConstantInt::getNullValue(Int32);
-    Constant *IdentData[] = {I32Null,
-                             ConstantInt::get(Int32, uint32_t(LocFlags)),
-                             ConstantInt::get(Int32, Reserve2Flags),
-                             ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
+    Constant *IdentData[] = {
+        I32Null, ConstantInt::get(Int32, uint32_t(LocFlags)),
+        ConstantInt::get(Int32, Reserve2Flags), I32Null, SrcLocStr};
     Constant *Initializer =
         ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
 
@@ -295,12 +290,10 @@ Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
     }
   }
 
-  return ConstantExpr::getPointerBitCastOrAddrSpaceCast(Ident, IdentPtr);
+  return Builder.CreatePointerCast(Ident, IdentPtr);
 }
 
-Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr,
-                                                uint32_t &SrcLocStrSize) {
-  SrcLocStrSize = LocStr.size();
+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr) {
   Constant *&SrcLocStr = SrcLocStrMap[LocStr];
   if (!SrcLocStr) {
     Constant *Initializer =
@@ -321,8 +314,8 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr,
 
 Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName,
                                                 StringRef FileName,
-                                                unsigned Line, unsigned Column,
-                                                uint32_t &SrcLocStrSize) {
+                                                unsigned Line,
+                                                unsigned Column) {
   SmallString<128> Buffer;
   Buffer.push_back(';');
   Buffer.append(FileName);
@@ -334,21 +327,17 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName,
   Buffer.append(std::to_string(Column));
   Buffer.push_back(';');
   Buffer.push_back(';');
-  return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize);
+  return getOrCreateSrcLocStr(Buffer.str());
 }
 
-Constant *
-OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize) {
-  StringRef UnknownLoc = ";unknown;unknown;0;0;;";
-  return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
+Constant *OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
+  return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
 }
 
-Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL,
-                                                uint32_t &SrcLocStrSize,
-                                                Function *F) {
+Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL, Function *F) {
   DILocation *DIL = DL.get();
   if (!DIL)
-    return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
+    return getOrCreateDefaultSrcLocStr();
   StringRef FileName = M.getName();
   if (DIFile *DIF = DIL->getFile())
     if (Optional<StringRef> Source = DIF->getSource())
@@ -357,13 +346,12 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL,
   if (Function.empty() && F)
     Function = F->getName();
   return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
-                              DIL->getColumn(), SrcLocStrSize);
+                              DIL->getColumn());
 }
 
-Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc,
-                                                uint32_t &SrcLocStrSize) {
-  return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize,
-                              Loc.IP.getBlock()->getParent());
+Constant *
+OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription &Loc) {
+  return getOrCreateSrcLocStr(Loc.DL, Loc.IP.getBlock()->getParent());
 }
 
 Value *OpenMPIRBuilder::getOrCreateThreadID(Value *Ident) {
@@ -405,11 +393,9 @@ OpenMPIRBuilder::emitBarrierImpl(const LocationDescription &Loc, Directive Kind,
     break;
   }
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Args[] = {
-      getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
-      getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Args[] = {getOrCreateIdent(SrcLocStr, BarrierLocFlags),
+                   getOrCreateThreadID(getOrCreateIdent(SrcLocStr))};
 
   // If we are in a cancellable parallel region, barriers are cancellation
   // points.
@@ -455,9 +441,8 @@ OpenMPIRBuilder::createCancel(const LocationDescription &Loc,
     llvm_unreachable("Unknown cancel kind!");
   }
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
   Value *Result = Builder.CreateCall(
       getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
@@ -528,9 +513,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
   if (!updateToLocation(Loc))
     return Loc.IP;
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadID = getOrCreateThreadID(Ident);
 
   if (NumThreads) {
@@ -887,9 +871,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
 
 void OpenMPIRBuilder::emitFlush(const LocationDescription &Loc) {
   // Build call void __kmpc_flush(ident_t *loc)
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Args[] = {getOrCreateIdent(SrcLocStr)};
 
   Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
 }
@@ -903,9 +886,8 @@ void OpenMPIRBuilder::createFlush(const LocationDescription &Loc) {
 void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription &Loc) {
   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
   // global_tid);
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
 
   // Ignore return result until untied tasks are supported.
@@ -921,9 +903,8 @@ void OpenMPIRBuilder::createTaskwait(const LocationDescription &Loc) {
 
 void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription &Loc) {
   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Constant *I32Null = ConstantInt::getNullValue(Int32);
   Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
 
@@ -1133,16 +1114,14 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
   Module *Module = Func->getParent();
   Value *RedArrayPtr =
       Builder.CreateBitCast(RedArray, Builder.getInt8PtrTy(), "red.array.ptr");
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
   bool CanGenerateAtomic =
       llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
         return RI.AtomicReductionGen;
       });
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
-                                  CanGenerateAtomic
-                                      ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
-                                      : IdentFlag(0));
+  Value *Ident = getOrCreateIdent(
+      SrcLocStr, CanGenerateAtomic ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
+                                   : IdentFlag(0));
   Value *ThreadId = getOrCreateThreadID(Ident);
   Constant *NumVariables = Builder.getInt32(NumReductions);
   const DataLayout &DL = Module->getDataLayout();
@@ -1256,9 +1235,8 @@ OpenMPIRBuilder::createMaster(const LocationDescription &Loc,
     return Loc.IP;
 
   Directive OMPD = Directive::OMPD_master;
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
   Value *Args[] = {Ident, ThreadId};
 
@@ -1280,9 +1258,8 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
     return Loc.IP;
 
   Directive OMPD = Directive::OMPD_masked;
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
   Value *Args[] = {Ident, ThreadId, Filter};
   Value *ArgsEnd[] = {Ident, ThreadId};
@@ -1503,9 +1480,8 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
   Builder.restoreIP(CLI->getPreheaderIP());
   Builder.SetCurrentDebugLocation(DL);
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
-  Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
+  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
 
   // Declare useful OpenMP runtime functions.
   Value *IV = CLI->getIndVar();
@@ -1632,9 +1608,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
   // Set up the source location value for OpenMP runtime.
   Builder.SetCurrentDebugLocation(DL);
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
-  Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(DL);
+  Value *SrcLoc = getOrCreateIdent(SrcLocStr);
 
   // Declare useful OpenMP runtime functions.
   Value *IV = CLI->getIndVar();
@@ -2404,9 +2379,8 @@ OpenMPIRBuilder::createCopyPrivate(const LocationDescription &Loc,
   if (!updateToLocation(Loc))
     return Loc.IP;
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
 
   llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
@@ -2433,9 +2407,8 @@ OpenMPIRBuilder::createSingle(const LocationDescription &Loc,
   }
 
   Directive OMPD = Directive::OMPD_single;
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
   Value *Args[] = {Ident, ThreadId};
 
@@ -2463,9 +2436,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCritical(
     return Loc.IP;
 
   Directive OMPD = Directive::OMPD_critical;
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
   Value *LockVar = getOMPCriticalRegionLock(CriticalName);
   Value *Args[] = {Ident, ThreadId, LockVar};
@@ -2514,9 +2486,8 @@ OpenMPIRBuilder::createOrderedDepend(const LocationDescription &Loc,
   Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
       ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
   Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
 
@@ -2541,9 +2512,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createOrderedThreadsSimd(
   Instruction *ExitCall = nullptr;
 
   if (IsThreads) {
-    uint32_t SrcLocStrSize;
-    Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-    Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+    Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+    Value *Ident = getOrCreateIdent(SrcLocStr);
     Value *ThreadId = getOrCreateThreadID(Ident);
     Value *Args[] = {Ident, ThreadId};
 
@@ -2748,9 +2718,8 @@ CallInst *OpenMPIRBuilder::createOMPAlloc(const LocationDescription &Loc,
   IRBuilder<>::InsertPointGuard IPG(Builder);
   Builder.restoreIP(Loc.IP);
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
   Value *Args[] = {ThreadId, Size, Allocator};
 
@@ -2765,9 +2734,8 @@ CallInst *OpenMPIRBuilder::createOMPFree(const LocationDescription &Loc,
   IRBuilder<>::InsertPointGuard IPG(Builder);
   Builder.restoreIP(Loc.IP);
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
   Value *Args[] = {ThreadId, Addr, Allocator};
   Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
@@ -2780,9 +2748,8 @@ CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
   IRBuilder<>::InsertPointGuard IPG(Builder);
   Builder.restoreIP(Loc.IP);
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   Value *ThreadId = getOrCreateThreadID(Ident);
   Constant *ThreadPrivateCache =
       getOrCreateOMPInternalVariable(Int8PtrPtr, Name);
@@ -2800,9 +2767,8 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
   if (!updateToLocation(Loc))
     return Loc.IP;
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   ConstantInt *IsSPMDVal = ConstantInt::getSigned(
       IntegerType::getInt8Ty(Int8->getContext()),
       IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);
@@ -2854,9 +2820,8 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
   if (!updateToLocation(Loc))
     return;
 
-  uint32_t SrcLocStrSize;
-  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc);
+  Value *Ident = getOrCreateIdent(SrcLocStr);
   ConstantInt *IsSPMDVal = ConstantInt::getSigned(
       IntegerType::getInt8Ty(Int8->getContext()),
       IsSPMD ? OMP_TGT_EXEC_MODE_SPMD : OMP_TGT_EXEC_MODE_GENERIC);

diff  --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 464d74905cd00..9c6b0f6920fbb 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1590,10 +1590,8 @@ struct OpenMPOpt {
             &F.getEntryBlock(), F.getEntryBlock().begin()));
       // Create a fallback location if non was found.
       // TODO: Use the debug locations of the calls instead.
-      uint32_t SrcLocStrSize;
-      Constant *Loc =
-          OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
-      Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc, SrcLocStrSize);
+      Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
+      Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
     }
     return Ident;
   }
@@ -3223,11 +3221,8 @@ struct AAKernelInfoFunction : AAKernelInfo {
       OpenMPIRBuilder::LocationDescription Loc(
           InsertPointTy(ParentBB, ParentBB->end()), DL);
       OMPInfoCache.OMPBuilder.updateToLocation(Loc);
-      uint32_t SrcLocStrSize;
-      auto *SrcLocStr =
-          OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc, SrcLocStrSize);
-      Value *Ident =
-          OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+      auto *SrcLocStr = OMPInfoCache.OMPBuilder.getOrCreateSrcLocStr(Loc);
+      Value *Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(SrcLocStr);
       BranchInst::Create(RegionCheckTidBB, ParentBB)->setDebugLoc(DL);
 
       // Add check for Tid in RegionCheckTidBB

diff  --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll
index 9228fa052e375..fbf554e3f9f67 100644
--- a/llvm/test/Transforms/OpenMP/deduplication.ll
+++ b/llvm/test/Transforms/OpenMP/deduplication.ll
@@ -19,6 +19,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16
 ; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
 ; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1
 ; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1
+; CHECK-DAG: @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
 ; UTC_ARGS: --enable
 
 

diff  --git a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
index b72031a9b68c0..602d4f0be5779 100644
--- a/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
+++ b/llvm/test/Transforms/OpenMP/get_hardware_num_threads_in_block_fold.ll
@@ -13,7 +13,7 @@ target triple = "nvptx64"
 ; CHECK: @[[KERNEL1_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; CHECK: @[[KERNEL2_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
-; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
+; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ;.
 define weak void @kernel0() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@kernel0

diff  --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
index b4ff9d9ffee6d..72af4cece3b42 100644
--- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
@@ -4694,10 +4694,10 @@ entry:
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4719,8 +4719,8 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
@@ -4753,7 +4753,7 @@ entry:
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]])
+; CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
 ; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
@@ -4837,10 +4837,10 @@ entry:
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4864,15 +4864,15 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
 ; CHECK1-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
@@ -4893,7 +4893,7 @@ entry:
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK1:       omp_region.body.split:
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -4921,11 +4921,11 @@ entry:
 ; CHECK1-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
 ; CHECK1-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK1-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4948,15 +4948,15 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
 ; CHECK1-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
@@ -4976,7 +4976,7 @@ entry:
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK1:       omp_region.body.split:
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5006,10 +5006,10 @@ entry:
 ; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5033,15 +5033,15 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
 ; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
@@ -5065,7 +5065,7 @@ entry:
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK1:       omp_region.body.split:
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5092,10 +5092,10 @@ entry:
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5118,15 +5118,15 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
 ; CHECK1-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
@@ -5150,7 +5150,7 @@ entry:
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK1:       omp_region.body.split:
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5179,13 +5179,13 @@ entry:
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
 ; CHECK1-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5210,15 +5210,15 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
 ; CHECK1-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
@@ -5239,7 +5239,7 @@ entry:
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK1:       omp_region.body.split:
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5270,10 +5270,10 @@ entry:
 ; CHECK1-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5295,8 +5295,8 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
@@ -5345,11 +5345,11 @@ entry:
 ; CHECK1-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5372,15 +5372,15 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
 ; CHECK1-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
@@ -5401,7 +5401,7 @@ entry:
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK1:       omp_region.body.split:
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5440,10 +5440,10 @@ entry:
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5465,11 +5465,11 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
@@ -5514,11 +5514,11 @@ entry:
 ; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5543,27 +5543,27 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
 ; CHECK1-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK1-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
 ; CHECK1-NEXT:    br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK1:       omp_region.end4:
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split.split.split:
 ; CHECK1-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
@@ -5584,7 +5584,7 @@ entry:
 ; CHECK1:       omp.par.merged.split.split.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
 ; CHECK1:       omp_region.body5.split:
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK1-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
@@ -5595,7 +5595,7 @@ entry:
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK1:       omp_region.body.split:
-; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5741,10 +5741,10 @@ entry:
 ; CHECK1-NEXT:  entry:
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5768,8 +5768,8 @@ entry:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
 ; CHECK1-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK1-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
@@ -5811,10 +5811,10 @@ entry:
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5836,8 +5836,8 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
@@ -5870,7 +5870,7 @@ entry:
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1:[0-9]+]])
+; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    call void @__kmpc_push_proc_bind(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 noundef 3)
 ; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
@@ -5954,10 +5954,10 @@ entry:
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5981,15 +5981,15 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
 ; CHECK2-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
@@ -6010,7 +6010,7 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6038,11 +6038,11 @@ entry:
 ; CHECK2-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
 ; CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK2-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6065,15 +6065,15 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
 ; CHECK2-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
@@ -6093,7 +6093,7 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6123,10 +6123,10 @@ entry:
 ; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6150,15 +6150,15 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
 ; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
@@ -6182,7 +6182,7 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6209,10 +6209,10 @@ entry:
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6235,15 +6235,15 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
 ; CHECK2-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
@@ -6267,7 +6267,7 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6296,13 +6296,13 @@ entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
 ; CHECK2-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6327,15 +6327,15 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
 ; CHECK2-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
@@ -6356,7 +6356,7 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6387,10 +6387,10 @@ entry:
 ; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6412,8 +6412,8 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
@@ -6462,11 +6462,11 @@ entry:
 ; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6489,15 +6489,15 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
 ; CHECK2-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
@@ -6518,7 +6518,7 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6557,10 +6557,10 @@ entry:
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6582,11 +6582,11 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
@@ -6631,11 +6631,11 @@ entry:
 ; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6660,27 +6660,27 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
 ; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
 ; CHECK2-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK2-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
 ; CHECK2-NEXT:    br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK2:       omp_region.end4:
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split.split.split:
 ; CHECK2-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
@@ -6701,7 +6701,7 @@ entry:
 ; CHECK2:       omp.par.merged.split.split.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
 ; CHECK2:       omp_region.body5.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END4]]
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
@@ -6712,7 +6712,7 @@ entry:
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
 ; CHECK2:       omp_region.body.split:
-; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    br label [[OMP_REGION_END]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6858,10 +6858,10 @@ entry:
 ; CHECK2-NEXT:  entry:
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6885,8 +6885,8 @@ entry:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
 ; CHECK2-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
 ; CHECK2-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:

diff  --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 4a99013e01a99..8ecae7f5af9aa 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -32,7 +32,7 @@ define void @kernel() {
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true)
 ; CHECK-NEXT:    call void @foo() #[[ATTR4:[0-9]+]]
 ; CHECK-NEXT:    call void @bar() #[[ATTR4]]
-; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    call void @unknown_no_openmp()
 ; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
 ; CHECK-NEXT:    ret void
 ;
@@ -41,7 +41,7 @@ define void @kernel() {
 ; CHECK-DISABLED-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* nonnull null, i8 1, i1 false, i1 true)
 ; CHECK-DISABLED-NEXT:    call void @foo() #[[ATTR4:[0-9]+]]
 ; CHECK-DISABLED-NEXT:    call void @bar() #[[ATTR4]]
-; CHECK-DISABLED-NEXT:    call void @unknown_no_openmp() #[[ATTR3:[0-9]+]]
+; CHECK-DISABLED-NEXT:    call void @unknown_no_openmp()
 ; CHECK-DISABLED-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* nonnull null, i8 1, i1 true)
 ; CHECK-DISABLED-NEXT:    ret void
 ;
@@ -183,14 +183,14 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK: attributes #[[ATTR0]] = { nosync nounwind }
 ; CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
 ; CHECK: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly }
-; CHECK: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
 ; CHECK: attributes #[[ATTR4]] = { nounwind }
 ; CHECK: attributes #[[ATTR5]] = { nosync nounwind writeonly }
 ;.
 ; CHECK-DISABLED: attributes #[[ATTR0]] = { nosync nounwind }
 ; CHECK-DISABLED: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
 ; CHECK-DISABLED: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly }
-; CHECK-DISABLED: attributes #[[ATTR3]] = { "llvm.assume"="omp_no_openmp" }
+; CHECK-DISABLED: attributes #[[ATTR3:[0-9]+]] = { "llvm.assume"="omp_no_openmp" }
 ; CHECK-DISABLED: attributes #[[ATTR4]] = { nounwind }
 ; CHECK-DISABLED: attributes #[[ATTR5]] = { nosync nounwind writeonly }
 ;.

diff  --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 5051bce98279d..29e4d08e0c5f6 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -113,7 +113,6 @@
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
-; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -127,7 +126,6 @@
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
-; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -1439,7 +1437,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU:       region.guarded.end:
 ; AMDGPU-NEXT:    br label [[REGION_BARRIER]]
 ; AMDGPU:       region.barrier:
-; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; AMDGPU-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 ; AMDGPU-NEXT:    br label [[REGION_EXIT:%.*]]
 ; AMDGPU:       region.exit:
 ; AMDGPU-NEXT:    br label [[FOR_COND:%.*]]
@@ -1475,7 +1473,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX:       region.guarded.end:
 ; NVPTX-NEXT:    br label [[REGION_BARRIER]]
 ; NVPTX:       region.barrier:
-; NVPTX-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]])
+; NVPTX-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]])
 ; NVPTX-NEXT:    br label [[REGION_EXIT:%.*]]
 ; NVPTX:       region.exit:
 ; NVPTX-NEXT:    br label [[FOR_COND:%.*]]

diff  --git a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
index 93065a263e67a..a0d43feecc3c0 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_assumes.ll
@@ -25,7 +25,6 @@ target triple = "nvptx64"
 ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; CHECK: @[[__OMP_OFFLOADING_FD02_404433C2_MAIN_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_fd02_404433c2_main_l5_exec_mode], section "llvm.metadata"
-; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ;.
 define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8 dereferenceable(8) %x) local_unnamed_addr #0 {
 ; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_fd02_404433c2_main_l5
@@ -51,7 +50,7 @@ define weak void @__omp_offloading_fd02_404433c2_main_l5(double* nonnull align 8
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
 ; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
 ; CHECK:       region.exit:
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [0 x i8*], [0 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0

diff  --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index b2c4aca9d5e53..5d6334e7fa2b2 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -47,7 +47,6 @@ target triple = "nvptx64"
 ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; CHECK: @[[__OMP_OFFLOADING_2A_FBFA7A_SEQUENTIAL_LOOP_L6_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x i8*] [i8* @__omp_offloading_2a_fbfa7a_sequential_loop_l6_exec_mode], section "llvm.metadata"
-; CHECK: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ;.
 ; CHECK-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
 ; CHECK-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
@@ -82,7 +81,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK:       region.guarded.end:
 ; CHECK-NEXT:    br label [[REGION_BARRIER]]
 ; CHECK:       region.barrier:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]])
 ; CHECK-NEXT:    br label [[REGION_EXIT:%.*]]
 ; CHECK:       region.exit:
 ; CHECK-NEXT:    br label [[FOR_COND_I:%.*]]
@@ -106,7 +105,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK:       region.guarded.end1:
 ; CHECK-NEXT:    br label [[REGION_BARRIER2]]
 ; CHECK:       region.barrier2:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]])
 ; CHECK-NEXT:    br label [[REGION_EXIT3]]
 ; CHECK:       region.exit3:
 ; CHECK-NEXT:    [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
@@ -127,7 +126,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK:       region.guarded.end6:
 ; CHECK-NEXT:    br label [[REGION_BARRIER7]]
 ; CHECK:       region.barrier7:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]])
 ; CHECK-NEXT:    br label [[REGION_EXIT8:%.*]]
 ; CHECK:       region.exit8:
 ; CHECK-NEXT:    [[CALL8_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8
@@ -144,7 +143,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK:       region.guarded.end11:
 ; CHECK-NEXT:    br label [[REGION_BARRIER12]]
 ; CHECK:       region.barrier12:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]])
 ; CHECK-NEXT:    br label [[REGION_EXIT13:%.*]]
 ; CHECK:       region.exit13:
 ; CHECK-NEXT:    [[CALL11_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8
@@ -161,7 +160,7 @@ define weak void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(i32* %x, i64 %N)
 ; CHECK:       region.guarded.end16:
 ; CHECK-NEXT:    br label [[REGION_BARRIER17]]
 ; CHECK:       region.barrier17:
-; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB2]], i32 [[TMP10]])
+; CHECK-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]])
 ; CHECK-NEXT:    br label [[REGION_EXIT18:%.*]]
 ; CHECK:       region.exit18:
 ; CHECK-NEXT:    [[CALL14_I:%.*]] = call i32 @no_openmp(i32* nonnull [[X]]) #[[ATTR7]], !noalias !8


        


More information about the llvm-commits mailing list