[llvm-branch-commits] [clang] [llvm] [OpenMP][clang] 6.0: num_threads strict (part 3: codegen) (PR #146405)
Robert Imschweiler via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Aug 1 07:07:56 PDT 2025
https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/146405
>From 16db9661eb53d8c50beba211065f0327694c24ba Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Fri, 27 Jun 2025 07:54:07 -0500
Subject: [PATCH 1/6] [OpenMP][clang] 6.0: num_threads strict (part 3: codegen)
OpenMP 6.0 12.1.2 specifies the behavior of the strict modifier for the
num_threads clause on parallel directives, along with the message and
severity clauses. This commit implements necessary codegen changes.
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 61 +-
clang/lib/CodeGen/CGOpenMPRuntime.h | 54 +-
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 61 +-
clang/lib/CodeGen/CGOpenMPRuntimeGPU.h | 26 +-
clang/lib/CodeGen/CGStmtOpenMP.cpp | 10 +-
...tx_target_parallel_num_threads_codegen.cpp | 738 +++++++-
.../OpenMP/parallel_num_threads_codegen.cpp | 33 +
.../target_parallel_num_threads_messages.cpp | 75 +-
...et_parallel_num_threads_strict_codegen.cpp | 1642 +++++++++++++++++
.../include/llvm/Frontend/OpenMP/OMPKinds.def | 12 +
10 files changed, 2609 insertions(+), 103 deletions(-)
create mode 100644 clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index ce2dd4d76368a..aadae117a5b85 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -1845,11 +1845,11 @@ void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
}
-void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
- llvm::Function *OutlinedFn,
- ArrayRef<llvm::Value *> CapturedVars,
- const Expr *IfCond,
- llvm::Value *NumThreads) {
+void CGOpenMPRuntime::emitParallelCall(
+ CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
+ llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
+ OpenMPSeverityClauseKind Severity, const Expr *Message) {
if (!CGF.HaveInsertPoint())
return;
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
@@ -2699,18 +2699,33 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
CGF.getContext().BoolTy, Loc);
}
-void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
- llvm::Value *NumThreads,
- SourceLocation Loc) {
+void CGOpenMPRuntime::emitNumThreadsClause(
+ CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
+ OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
+ const Expr *Message) {
if (!CGF.HaveInsertPoint())
return;
+ llvm::SmallVector<llvm::Value *, 4> Args(
+ {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+ CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)});
// Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
- llvm::Value *Args[] = {
- emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
- CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_push_num_threads),
- Args);
+ // or __kmpc_push_num_threads_strict(&loc, global_tid, num_threads, severity,
+ // messsage) if strict modifier is used.
+ RuntimeFunction FnID = OMPRTL___kmpc_push_num_threads;
+ if (Modifier == OMPC_NUMTHREADS_strict) {
+ FnID = OMPRTL___kmpc_push_num_threads_strict;
+ // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is
+ // as if sev-level is fatal."
+ Args.push_back(llvm::ConstantInt::get(
+ CGM.Int32Ty, Severity == OMPC_SEVERITY_warning ? 1 : 2));
+ if (Message)
+ Args.push_back(CGF.EmitStringLiteralLValue(cast<StringLiteral>(Message))
+ .getPointer(CGF));
+ else
+ Args.push_back(llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
+ }
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
}
void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
@@ -12030,12 +12045,11 @@ llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
llvm_unreachable("Not supported in SIMD-only mode");
}
-void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
- SourceLocation Loc,
- llvm::Function *OutlinedFn,
- ArrayRef<llvm::Value *> CapturedVars,
- const Expr *IfCond,
- llvm::Value *NumThreads) {
+void CGOpenMPSIMDRuntime::emitParallelCall(
+ CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
+ llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
+ OpenMPSeverityClauseKind Severity, const Expr *Message) {
llvm_unreachable("Not supported in SIMD-only mode");
}
@@ -12138,9 +12152,10 @@ llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
llvm_unreachable("Not supported in SIMD-only mode");
}
-void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
- llvm::Value *NumThreads,
- SourceLocation Loc) {
+void CGOpenMPSIMDRuntime::emitNumThreadsClause(
+ CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
+ OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
+ const Expr *Message) {
llvm_unreachable("Not supported in SIMD-only mode");
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 5be48b439f4fd..8f97dd70116c3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -777,11 +777,22 @@ class CGOpenMPRuntime {
/// specified, nullptr otherwise.
/// \param NumThreads The value corresponding to the num_threads clause, if
/// any, or nullptr.
+ /// \param NumThreadsModifier The modifier of the num_threads clause, if
+ /// any, ignored otherwise.
+ /// \param Severity The severity corresponding to the num_threads clause, if
+ /// any, ignored otherwise.
+ /// \param Message The message string corresponding to the num_threads clause,
+ /// if any, or nullptr.
///
- virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
- llvm::Function *OutlinedFn,
- ArrayRef<llvm::Value *> CapturedVars,
- const Expr *IfCond, llvm::Value *NumThreads);
+ virtual void
+ emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+ llvm::Function *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
+ llvm::Value *NumThreads,
+ OpenMPNumThreadsClauseModifier NumThreadsModifier =
+ OMPC_NUMTHREADS_unknown,
+ OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
+ const Expr *Message = nullptr);
/// Emits a critical region.
/// \param CriticalName Name of the critical region.
@@ -1040,10 +1051,16 @@ class CGOpenMPRuntime {
/// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
+ /// If the modifier 'strict' is given:
+ /// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 num_threads, int severity, const char *message) to
+ /// generate code for 'num_threads' clause with 'strict' modifier.
/// \param NumThreads An integer value of threads.
- virtual void emitNumThreadsClause(CodeGenFunction &CGF,
- llvm::Value *NumThreads,
- SourceLocation Loc);
+ virtual void emitNumThreadsClause(
+ CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
+ OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
+ OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
+ const Expr *Message = nullptr);
/// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
@@ -1737,11 +1754,21 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
/// specified, nullptr otherwise.
/// \param NumThreads The value corresponding to the num_threads clause, if
/// any, or nullptr.
+ /// \param NumThreadsModifier The modifier of the num_threads clause, if
+ /// any, ignored otherwise.
+ /// \param Severity The severity corresponding to the num_threads clause, if
+ /// any, ignored otherwise.
+ /// \param Message The message string corresponding to the num_threads clause,
+ /// if any, or nullptr.
///
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Function *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
- const Expr *IfCond, llvm::Value *NumThreads) override;
+ const Expr *IfCond, llvm::Value *NumThreads,
+ OpenMPNumThreadsClauseModifier NumThreadsModifier =
+ OMPC_NUMTHREADS_unknown,
+ OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
+ const Expr *Message = nullptr) override;
/// Emits a critical region.
/// \param CriticalName Name of the critical region.
@@ -1911,9 +1938,16 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
/// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
+ /// If the modifier 'strict' is given:
+ /// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 num_threads, int severity, const char *message) to
+ /// generate code for 'num_threads' clause with 'strict' modifier.
/// \param NumThreads An integer value of threads.
- void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads,
- SourceLocation Loc) override;
+ void emitNumThreadsClause(
+ CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
+ OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
+ OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
+ const Expr *Message = nullptr) override;
/// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
/// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index e25b6948d30f8..7e4f47dc05329 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -899,9 +899,10 @@ void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
// Nothing to do.
}
-void CGOpenMPRuntimeGPU::emitNumThreadsClause(CodeGenFunction &CGF,
- llvm::Value *NumThreads,
- SourceLocation Loc) {
+void CGOpenMPRuntimeGPU::emitNumThreadsClause(
+ CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
+ OpenMPNumThreadsClauseModifier Modifier, OpenMPSeverityClauseKind Severity,
+ const Expr *Message) {
// Nothing to do.
}
@@ -1201,18 +1202,17 @@ void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
}
-void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
- SourceLocation Loc,
- llvm::Function *OutlinedFn,
- ArrayRef<llvm::Value *> CapturedVars,
- const Expr *IfCond,
- llvm::Value *NumThreads) {
+void CGOpenMPRuntimeGPU::emitParallelCall(
+ CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond,
+ llvm::Value *NumThreads, OpenMPNumThreadsClauseModifier NumThreadsModifier,
+ OpenMPSeverityClauseKind Severity, const Expr *Message) {
if (!CGF.HaveInsertPoint())
return;
- auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond,
- NumThreads](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
+ auto &&ParallelGen = [this, Loc, OutlinedFn, CapturedVars, IfCond, NumThreads,
+ NumThreadsModifier, Severity, Message](
+ CodeGenFunction &CGF, PrePostActionTy &Action) {
CGBuilderTy &Bld = CGF.Builder;
llvm::Value *NumThreadsVal = NumThreads;
llvm::Function *WFn = WrapperFunctionsMap[OutlinedFn];
@@ -1260,21 +1260,30 @@ void CGOpenMPRuntimeGPU::emitParallelCall(CodeGenFunction &CGF,
NumThreadsVal = Bld.CreateZExtOrTrunc(NumThreadsVal, CGF.Int32Ty);
assert(IfCondVal && "Expected a value");
+ RuntimeFunction FnID = OMPRTL___kmpc_parallel_51;
llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
- llvm::Value *Args[] = {
- RTLoc,
- getThreadID(CGF, Loc),
- IfCondVal,
- NumThreadsVal,
- llvm::ConstantInt::get(CGF.Int32Ty, -1),
- FnPtr,
- ID,
- Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
- CGF.VoidPtrPtrTy),
- llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_parallel_51),
- Args);
+ llvm::SmallVector<llvm::Value *, 10> Args(
+ {RTLoc, getThreadID(CGF, Loc), IfCondVal, NumThreadsVal,
+ llvm::ConstantInt::get(CGF.Int32Ty, -1), FnPtr, ID,
+ Bld.CreateBitOrPointerCast(CapturedVarsAddrs.emitRawPointer(CGF),
+ CGF.VoidPtrPtrTy),
+ llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())});
+ if (NumThreadsModifier == OMPC_NUMTHREADS_strict) {
+ FnID = OMPRTL___kmpc_parallel_60;
+ // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect
+ // is as if sev-level is fatal."
+ Args.append(
+ {llvm::ConstantInt::get(CGM.Int32Ty, true),
+ llvm::ConstantInt::get(CGM.Int32Ty,
+ Severity == OMPC_SEVERITY_warning ? 1 : 2)});
+ if (Message)
+ Args.push_back(CGF.EmitStringLiteralLValue(cast<StringLiteral>(Message))
+ .getPointer(CGF));
+ else
+ Args.push_back(llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
+ }
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
};
RegionCodeGenTy RCG(ParallelGen);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index b59f43a6915dd..3e367088a47f8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -165,9 +165,16 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
+ /// If the modifier 'strict' is given:
+ /// Emits call to void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32
+ /// global_tid, kmp_int32 num_threads, int severity, const char *message) to
+ /// generate code for 'num_threads' clause with 'strict' modifier.
/// \param NumThreads An integer value of threads.
- void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads,
- SourceLocation Loc) override;
+ void emitNumThreadsClause(
+ CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc,
+ OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown,
+ OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
+ const Expr *Message = nullptr) override;
/// This function ought to emit, in the general case, a call to
// the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
@@ -229,12 +236,21 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
/// \param IfCond Condition in the associated 'if' clause, if it was
/// specified, nullptr otherwise.
/// \param NumThreads The value corresponding to the num_threads clause, if
- /// any,
- /// or nullptr.
+ /// any, or nullptr.
+ /// \param NumThreadsModifier The modifier of the num_threads clause, if
+ /// any, ignored otherwise.
+ /// \param Severity The severity corresponding to the num_threads clause, if
+ /// any, ignored otherwise.
+ /// \param Message The message string corresponding to the num_threads clause,
+ /// if any, or nullptr.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
llvm::Function *OutlinedFn,
ArrayRef<llvm::Value *> CapturedVars,
- const Expr *IfCond, llvm::Value *NumThreads) override;
+ const Expr *IfCond, llvm::Value *NumThreads,
+ OpenMPNumThreadsClauseModifier NumThreadsModifier =
+ OMPC_NUMTHREADS_unknown,
+ OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal,
+ const Expr *Message = nullptr) override;
/// Emit an implicit/explicit barrier for OpenMP threads.
/// \param Kind Directive for which this implicit barrier call must be
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 5822e0f6db89a..ad7b941f92dd6 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1616,8 +1616,16 @@ static void emitCommonOMPParallelDirective(
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign=*/true);
+ OpenMPNumThreadsClauseModifier Modifier = NumThreadsClause->getModifier();
+ OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
+ clang::Expr *Message = nullptr;
+ if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>())
+ Message = MessageClause->getMessageString();
+ if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>())
+ Severity = SeverityClause->getSeverityKind();
CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
- CGF, NumThreads, NumThreadsClause->getBeginLoc());
+ CGF, NumThreads, NumThreadsClause->getBeginLoc(), Modifier, Severity,
+ Message);
}
if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
index f92ce4e89464b..7de49fed0ca65 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
@@ -1,10 +1,16 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
// Test target codegen - host bc file has to be created first.
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_1
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2
+
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_1
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1
@@ -26,6 +32,12 @@ tx ftemplate(int n) {
{
aa += 1;
}
+ #ifdef OMP60
+ #pragma omp target parallel map(tofrom: aa) num_threads(strict: 1024)
+ {
+ aa += 1;
+ }
+ #endif
#pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(n)
{
@@ -33,6 +45,14 @@ tx ftemplate(int n) {
aa += 1;
b[2] += 1;
}
+ #ifdef OMP60
+ #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(strict: n)
+ {
+ a += 1;
+ aa += 1;
+ b[2] += 1;
+ }
+ #endif
return a;
}
@@ -46,7 +66,655 @@ int bar(int n){
}
#endif
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25
+// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// OMP45_1-NEXT: entry:
+// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// OMP45_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
+// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP45_1: user_code.entry:
+// OMP45_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// OMP45_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// OMP45_1-NEXT: call void @__kmpc_target_deinit()
+// OMP45_1-NEXT: ret void
+// OMP45_1: worker.exit:
+// OMP45_1-NEXT: ret void
+//
+//
+// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP45_1-NEXT: entry:
+// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
+// OMP45_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP45_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP45_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP45_1-NEXT: ret void
+//
+//
+// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// OMP45_1-NEXT: entry:
+// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
+// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// OMP45_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]]
+// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
+// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]]
+// OMP45_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
+// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP45_1: user_code.entry:
+// OMP45_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP45_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP45_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// OMP45_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP45_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// OMP45_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// OMP45_1-NEXT: call void @__kmpc_target_deinit()
+// OMP45_1-NEXT: ret void
+// OMP45_1: worker.exit:
+// OMP45_1-NEXT: ret void
+//
+//
+// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP45_1-NEXT: entry:
+// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8]]
+// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
+// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]]
+// OMP45_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP45_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP45_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP45_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP45_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP45_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP45_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP45_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP45_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP45_1-NEXT: ret void
+//
+//
+// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// OMP45_2-NEXT: entry:
+// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
+// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// OMP45_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
+// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP45_2: user_code.entry:
+// OMP45_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// OMP45_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
+// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// OMP45_2-NEXT: call void @__kmpc_target_deinit()
+// OMP45_2-NEXT: ret void
+// OMP45_2: worker.exit:
+// OMP45_2-NEXT: ret void
+//
+//
+// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP45_2-NEXT: entry:
+// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]]
+// OMP45_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP45_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP45_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP45_2-NEXT: ret void
+//
+//
+// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// OMP45_2-NEXT: entry:
+// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
+// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// OMP45_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]]
+// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]]
+// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]]
+// OMP45_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
+// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP45_2: user_code.entry:
+// OMP45_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP45_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4
+// OMP45_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// OMP45_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
+// OMP45_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// OMP45_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
+// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// OMP45_2-NEXT: call void @__kmpc_target_deinit()
+// OMP45_2-NEXT: ret void
+// OMP45_2: worker.exit:
+// OMP45_2-NEXT: ret void
+//
+//
+// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP45_2-NEXT: entry:
+// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8]]
+// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]]
+// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]]
+// OMP45_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP45_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP45_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP45_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP45_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP45_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP45_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2
+// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP45_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP45_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP45_2-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
+// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_1: user_code.entry:
+// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// OMP60_1-NEXT: call void @__kmpc_target_deinit()
+// OMP60_1-NEXT: ret void
+// OMP60_1: worker.exit:
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
+// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
+// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_1: user_code.entry:
+// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// OMP60_1-NEXT: call void @__kmpc_target_deinit()
+// OMP60_1-NEXT: ret void
+// OMP60_1: worker.exit:
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
+// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
+// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10:![0-9]+]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]]
+// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
+// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_1: user_code.entry:
+// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// OMP60_1-NEXT: call void @__kmpc_target_deinit()
+// OMP60_1-NEXT: ret void
+// OMP60_1: worker.exit:
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]]
+// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49
+// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
+// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]]
+// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_kernel_environment, ptr [[DYN_PTR]])
+// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_1: user_code.entry:
+// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// OMP60_1-NEXT: call void @__kmpc_target_deinit()
+// OMP60_1-NEXT: ret void
+// OMP60_1: worker.exit:
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined
+// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
+// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
+// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META8]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META8]], !align [[META9]]
+// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META8]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
+// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
+// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_2: user_code.entry:
+// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
+// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// OMP60_2-NEXT: call void @__kmpc_target_deinit()
+// OMP60_2-NEXT: ret void
+// OMP60_2: worker.exit:
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
+// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
+// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
+// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_2: user_code.entry:
+// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
+// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// OMP60_2-NEXT: call void @__kmpc_target_deinit()
+// OMP60_2-NEXT: ret void
+// OMP60_2: worker.exit:
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
+// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
+// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// OMP60_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10:![0-9]+]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
+// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
+// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_2: user_code.entry:
+// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4
+// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
+// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
+// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// OMP60_2-NEXT: call void @__kmpc_target_deinit()
+// OMP60_2-NEXT: ret void
+// OMP60_2: worker.exit:
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
+// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2
+// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49
+// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
+// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// OMP60_2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
+// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_kernel_environment, ptr [[DYN_PTR]])
+// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_2: user_code.entry:
+// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4
+// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
+// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
+// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// OMP60_2-NEXT: call void @__kmpc_target_deinit()
+// OMP60_2-NEXT: ret void
+// OMP60_2: worker.exit:
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined
+// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
+// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
+// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META8]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META8]], !align [[META9]]
+// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META8]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i32 0, i32 2
+// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP60_2-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
@@ -54,22 +722,22 @@ int bar(int n){
// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment, ptr [[DYN_PTR]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
//
//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -78,7 +746,7 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
@@ -87,7 +755,7 @@ int bar(int n){
// CHECK1-NEXT: ret void
//
//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
@@ -101,10 +769,10 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
@@ -116,14 +784,14 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
// CHECK1-NEXT: ret void
//
//
-// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -136,9 +804,9 @@ int bar(int n){
// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !nonnull [[META6]], !align [[META8]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8, !nonnull [[META6]], !align [[META7]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META6]], !align [[META8]]
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
@@ -154,7 +822,7 @@ int bar(int n){
// CHECK1-NEXT: ret void
//
//
-// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
@@ -162,22 +830,22 @@ int bar(int n){
// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment, ptr [[DYN_PTR]])
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
//
//
-// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -186,7 +854,7 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]]
// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
@@ -195,7 +863,7 @@ int bar(int n){
// CHECK2-NEXT: ret void
//
//
-// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
@@ -209,10 +877,10 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]])
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
@@ -224,14 +892,14 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
// CHECK2-NEXT: ret void
//
//
-// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
@@ -244,9 +912,9 @@ int bar(int n){
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4, !nonnull [[META6]], !align [[META7]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META8]]
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
index de10bead4ff81..2fb9589ab1ab6 100644
--- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
@@ -2,9 +2,18 @@
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefixes=CHECK,OMP60 %s
+// RUN: %clang_cc1 -DOMP60 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DOMP60 -fopenmp -fopenmp-version=60 -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefixes=CHECK,OMP60 %s
+
// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple %itanium_abi_triple -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DOMP60 -fopenmp-simd -fopenmp-version=60 -x c++ -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
// expected-no-diagnostics
#ifndef HEADER
@@ -32,6 +41,12 @@ int tmain() {
foo();
#pragma omp parallel num_threads(T(23))
foo();
+#ifdef OMP60
+#pragma omp parallel num_threads(strict: C)
+ foo();
+#pragma omp parallel num_threads(strict: T(23))
+ foo();
+#endif
return 0;
}
@@ -42,6 +57,12 @@ int main() {
foo();
#pragma omp parallel num_threads(a)
foo();
+#ifdef OMP60
+#pragma omp parallel num_threads(strict: 2)
+ foo();
+#pragma omp parallel num_threads(strict: a)
+ foo();
+#endif
return a + tmain<char, 5>() + tmain<S, 1>();
}
@@ -70,6 +91,10 @@ int main() {
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
// CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23)
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
+// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 5, i32 2, ptr null)
+// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call(
+// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23, i32 2, ptr null)
+// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call(
// CHECK: ret [[INT_TY]] 0
// CHECK-NEXT: }
@@ -83,6 +108,14 @@ int main() {
// CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
// CHECK: {{(invoke|call)}} {{.*}} [[S_TY_DESTR]](ptr {{[^,]*}} [[S_TEMP]])
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
+// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 1, i32 2, ptr null)
+// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call(
+// OMP60: {{(invoke|call)}} {{.*}} [[S_TY_CONSTR]](ptr {{[^,]*}} [[S_TEMP:%.+]], [[INTPTR_T_TY]] noundef [[INTPTR_T_TY_ATTR]]23)
+// OMP60: [[S_CHAR_OP1:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]](ptr {{[^,]*}} [[S_TEMP]])
+// OMP60: [[RES1:%.+]] = sext {{.*}}i8 [[S_CHAR_OP1]] to i32
+// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES1]], i32 2, ptr null)
+// OMP60: {{(invoke|call)}} {{.*}} [[S_TY_DESTR]](ptr {{[^,]*}} [[S_TEMP]])
+// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call(
// CHECK: ret [[INT_TY]] 0
// CHECK: }
diff --git a/clang/test/OpenMP/target_parallel_num_threads_messages.cpp b/clang/test/OpenMP/target_parallel_num_threads_messages.cpp
index 79f77b7dc9f91..7d487c8a4da33 100644
--- a/clang/test/OpenMP/target_parallel_num_threads_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_num_threads_messages.cpp
@@ -1,7 +1,9 @@
// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized
-
// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized
+// RUN: %clang_cc1 -DOMP60 -verify=expected,omp60 -fopenmp -fopenmp-version=60 -ferror-limit 100 %s -Wuninitialized
+// RUN: %clang_cc1 -DOMP60 -verify=expected,omp60 -fopenmp-simd -fopenmp-version=60 -ferror-limit 100 %s -Wuninitialized
+
void foo() {
}
@@ -9,11 +11,11 @@ bool foobool(int argc) {
return argc;
}
-struct S1; // expected-note {{declared here}}
+struct S1; // expected-note {{declared here}} omp60-note {{declared here}}
#define redef_num_threads(a, b) num_threads(a)
-template <class T, typename S, int N> // expected-note {{declared here}}
+template <class T, typename S, int N> // expected-note {{declared here}} omp60-note {{declared here}}
T tmain(T argc, S **argv) {
T z;
#pragma omp target parallel num_threads // expected-error {{expected '(' after 'num_threads'}}
@@ -41,6 +43,40 @@ T tmain(T argc, S **argv) {
#pragma omp target parallel redef_num_threads (argc, argc)
foo();
+#ifdef OMP60
+ // Valid uses of strict modifier
+ #pragma omp parallel num_threads(strict: 4)
+ #pragma omp parallel num_threads(strict: argc+z)
+
+ // Invalid: missing expression after strict:
+ #pragma omp parallel num_threads(strict: ) // omp60-error {{expected expression}}
+ #pragma omp parallel num_threads(strict:) // omp60-error {{expected expression}}
+ #pragma omp parallel num_threads(strict: // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}}
+
+ // Invalid: unknown/missing modifier
+ #pragma omp parallel num_threads(foo: 4) // omp60-error {{expected 'strict' in OpenMP clause 'num_threads'}}
+ #pragma omp parallel num_threads(: 4) // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}}
+ #pragma omp parallel num_threads(:)// omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}}
+
+ // Invalid: missing colon after modifier
+ #pragma omp parallel num_threads(strict 4) // omp60-error {{missing ':' after strict modifier}}
+
+ // Invalid: negative, zero, or non-integral
+ #pragma omp parallel num_threads(strict: -1) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
+ #pragma omp parallel num_threads(strict: 0) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
+ #pragma omp parallel num_threads(strict: (argc > 0) ? argv[1] : argv[2]) // omp60-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
+ #pragma omp parallel num_threads(strict: S) // omp60-error {{'S' does not refer to a value}}
+ #pragma omp parallel num_threads(strict: argv[1]=2) // omp60-error {{expected ')'}} omp60-note {{to match this '('}} omp60-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
+ #pragma omp parallel num_threads(strict: N) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
+
+ // Invalid: multiple strict modifiers or mixed with non-strict
+ #pragma omp parallel num_threads(strict: 4, strict: 5) // omp60-error {{expected ')'}} expected-note {{to match this '('}}
+ #pragma omp parallel num_threads(strict: 4), num_threads(5) // omp60-error {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}}
+ #pragma omp parallel num_threads(4), num_threads(strict: 5) // omp60-error {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}}
+#endif // OMP60
+
+ foo();
+
return argc;
}
@@ -69,5 +105,38 @@ int main(int argc, char **argv) {
#pragma omp target parallel redef_num_threads (argc, argc)
foo();
+#ifdef OMP60
+ // Valid uses of strict modifier
+ #pragma omp parallel num_threads(strict: 4)
+ #pragma omp parallel num_threads(strict: argc+z)
+
+ // Invalid: missing expression after strict:
+ #pragma omp parallel num_threads(strict: ) // omp60-error {{expected expression}}
+ #pragma omp parallel num_threads(strict:) // omp60-error {{expected expression}}
+ #pragma omp parallel num_threads(strict: // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}}
+
+ // Invalid: unknown/missing modifier
+ #pragma omp parallel num_threads(foo: 4) // omp60-error {{expected 'strict' in OpenMP clause 'num_threads'}}
+ #pragma omp parallel num_threads(: 4) // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}}
+ #pragma omp parallel num_threads(:) // omp60-error {{expected expression}} omp60-error {{expected ')'}} omp60-note {{to match this '('}}
+
+ // Invalid: missing colon after modifier
+ #pragma omp parallel num_threads(strict 4) // omp60-error {{missing ':' after strict modifier}}
+
+ // Invalid: negative, zero, or non-integral
+ #pragma omp parallel num_threads(strict: -1) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
+ #pragma omp parallel num_threads(strict: 0) // omp60-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
+ #pragma omp parallel num_threads(strict: (argc > 0) ? argv[1] : argv[2]) // omp60-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+ #pragma omp parallel num_threads(strict: S1) // omp60-error {{'S1' does not refer to a value}}
+ #pragma omp parallel num_threads(strict: argv[1]=2) // omp60-error {{expected ')'}} omp60-note {{to match this '('}} omp60-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+
+ // Invalid: multiple strict modifiers or mixed with non-strict
+ #pragma omp parallel num_threads(strict: 4, strict: 5) // omp60-error {{expected ')'}} expected-note {{to match this '('}}
+ #pragma omp parallel num_threads(strict: 4), num_threads(5) // omp60-error {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}}
+ #pragma omp parallel num_threads(4), num_threads(strict: 5) // omp60-error {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}}
+#endif // OMP60
+
+ foo();
+
return tmain<int, char, 3>(argc, argv); // expected-note {{in instantiation of function template specialization 'tmain<int, char, 3>' requested here}}
}
diff --git a/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp
new file mode 100644
index 0000000000000..3220e61c91fdf
--- /dev/null
+++ b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp
@@ -0,0 +1,1642 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK3
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK3
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK9
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK9
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK11
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK11
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=60 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+
+// We have 6 target regions
+
+// Check target registration is registered as a Ctor.
+
+// Check that the offloading functions are emitted and that the parallel function
+// is appropriately guarded.
+
+
+template<typename tx>
+tx ftemplate(int n) {
+ tx a = 0;
+
+ #pragma omp target parallel num_threads(strict: tx(20))
+ {
+ }
+
+ short b = 1;
+ #pragma omp target parallel num_threads(strict: b)
+ {
+ a += b;
+ }
+
+ return a;
+}
+
+static
+int fstatic(int n) {
+
+ #pragma omp target parallel num_threads(strict: n)
+ {
+ }
+
+ #pragma omp target parallel num_threads(strict: 32+n)
+ {
+ }
+
+ return n+1;
+}
+
+struct S1 {
+ double a;
+
+ int r1(int n){
+ int b = 1;
+
+ #pragma omp target parallel num_threads(strict: n-b)
+ {
+ this->a = (double)b + 1.5;
+ }
+
+ #pragma omp target parallel num_threads(strict: 1024)
+ {
+ this->a = 2.5;
+ }
+
+ return (int)a;
+ }
+};
+
+int bar(int n){
+ int a = 0;
+
+ S1 S;
+ a += S.r1(n);
+
+ a += fstatic(n);
+
+ a += ftemplate<int>(n);
+
+ return a;
+}
+
+#endif
+// CHECK1-LABEL: define {{[^@]+}}@_Z3bari
+// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8
+// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP0]])
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CALL]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP2]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
+// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], [[CALL1]]
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP4]])
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4
+// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP5]], [[CALL3]]
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
+// CHECK1-NEXT: ret i32 [[TMP6]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
+// CHECK1-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: store i32 1, ptr [[B]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4
+// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP0]], [[TMP1]]
+// CHECK1-NEXT: store i32 [[SUB]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[B_CASTED]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[A]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store i64 [[TMP3]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT: store i64 [[TMP5]], ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 3, ptr [[TMP19]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 3, ptr [[TMP20]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP21]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes, ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes, ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK1-NEXT: store i64 0, ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP29]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK1-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK1-NEXT: store i32 0, ptr [[TMP31]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 [[TMP17]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
+// CHECK1-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1: omp_offload.failed:
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86(ptr [[THIS1]], i64 [[TMP3]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// CHECK1: omp_offload.cont:
+// CHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[THIS1]], ptr [[TMP34]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[A2]], ptr [[TMP35]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP36]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT: store i32 3, ptr [[TMP39]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP40]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP37]], ptr [[TMP41]], align 8
+// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 8
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.1, ptr [[TMP43]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP44]], align 8
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP46]], align 8
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP47]], align 8
+// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK1-NEXT: store i64 0, ptr [[TMP48]], align 8
+// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP49]], align 4
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK1-NEXT: store [3 x i32] [i32 1024, i32 0, i32 0], ptr [[TMP50]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK1-NEXT: store i32 0, ptr [[TMP51]], align 4
+// CHECK1-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1024, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK1-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
+// CHECK1-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1: omp_offload.failed7:
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91(ptr [[THIS1]]) #[[ATTR2]]
+// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]]
+// CHECK1: omp_offload.cont8:
+// CHECK1-NEXT: [[A9:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP54:%.*]] = load double, ptr [[A9]], align 8
+// CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[TMP54]] to i32
+// CHECK1-NEXT: ret i32 [[CONV]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_ZL7fstatici
+// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP2]], ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP8]], 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 3, ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP11]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.3, ptr [[TMP14]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP16]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP17]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP18]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK1-NEXT: store i64 0, ptr [[TMP19]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP20]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK1-NEXT: store [3 x i32] [[TMP9]], ptr [[TMP21]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK1-NEXT: store i32 0, ptr [[TMP22]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP8]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+// CHECK1-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1: omp_offload.failed:
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69(i64 [[TMP2]]) #[[ATTR2]]
+// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// CHECK1: omp_offload.cont:
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 32, [[TMP25]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4
+// CHECK1-NEXT: [[TMP27:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP27]], ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0
+// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK1-NEXT: store i32 3, ptr [[TMP35]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK1-NEXT: store i32 1, ptr [[TMP36]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP37]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP38]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.5, ptr [[TMP39]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP40]], align 8
+// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP41]], align 8
+// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP43]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK1-NEXT: store i64 0, ptr [[TMP44]], align 8
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP45]], align 4
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK1-NEXT: store [3 x i32] [[TMP34]], ptr [[TMP46]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK1-NEXT: store i32 0, ptr [[TMP47]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP33]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0
+// CHECK1-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK1: omp_offload.failed7:
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73(i64 [[TMP27]]) #[[ATTR2]]
+// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT8]]
+// CHECK1: omp_offload.cont8:
+// CHECK1-NEXT: [[TMP50:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP50]], 1
+// CHECK1-NEXT: ret i32 [[ADD9]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
+// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK1-NEXT: [[B:%.*]] = alloca i16, align 2
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK1-NEXT: [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK1-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[A]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK1-NEXT: store i32 3, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK1-NEXT: store i32 0, ptr [[TMP1]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK1-NEXT: store ptr null, ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK1-NEXT: store ptr null, ptr [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK1-NEXT: store ptr null, ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK1-NEXT: store i64 0, ptr [[TMP9]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP10]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK1-NEXT: store [3 x i32] [i32 20, i32 0, i32 0], ptr [[TMP11]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK1-NEXT: store i32 0, ptr [[TMP12]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 20, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.region_id, ptr [[KERNEL_ARGS]])
+// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
+// CHECK1-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK1: omp_offload.failed:
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53() #[[ATTR2]]
+// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// CHECK1: omp_offload.cont:
+// CHECK1-NEXT: store i16 1, ptr [[B]], align 2
+// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[B]], align 2
+// CHECK1-NEXT: store i16 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 2
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4
+// CHECK1-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[A_CASTED]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[B]], align 2
+// CHECK1-NEXT: store i16 [[TMP18]], ptr [[B_CASTED]], align 2
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[B_CASTED]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
+// CHECK1-NEXT: store i16 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2
+// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: store i64 [[TMP17]], ptr [[TMP23]], align 8
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr null, ptr [[TMP24]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP25]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK1-NEXT: store i64 [[TMP19]], ptr [[TMP26]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr null, ptr [[TMP27]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP28]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK1-NEXT: store i64 [[TMP21]], ptr [[TMP29]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr null, ptr [[TMP30]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
+// CHECK1-NEXT: [[TMP34:%.*]] = zext i16 [[TMP33]] to i32
+// CHECK1-NEXT: [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
+// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
+// CHECK1-NEXT: store i32 3, ptr [[TMP36]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1
+// CHECK1-NEXT: store i32 3, ptr [[TMP37]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 2
+// CHECK1-NEXT: store ptr [[TMP31]], ptr [[TMP38]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 3
+// CHECK1-NEXT: store ptr [[TMP32]], ptr [[TMP39]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 4
+// CHECK1-NEXT: store ptr @.offload_sizes.7, ptr [[TMP40]], align 8
+// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 5
+// CHECK1-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP41]], align 8
+// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 6
+// CHECK1-NEXT: store ptr null, ptr [[TMP42]], align 8
+// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 7
+// CHECK1-NEXT: store ptr null, ptr [[TMP43]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 8
+// CHECK1-NEXT: store i64 0, ptr [[TMP44]], align 8
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 9
+// CHECK1-NEXT: store i64 0, ptr [[TMP45]], align 8
+// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 10
+// CHECK1-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP46]], align 4
+// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 11
+// CHECK1-NEXT: store [3 x i32] [[TMP35]], ptr [[TMP47]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 12
+// CHECK1-NEXT: store i32 0, ptr [[TMP48]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP34]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.region_id, ptr [[KERNEL_ARGS1]])
+// CHECK1-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
+// CHECK1-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
+// CHECK1: omp_offload.failed2:
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58(i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]]) #[[ATTR2]]
+// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT3]]
+// CHECK1: omp_offload.cont3:
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[A]], align 4
+// CHECK1-NEXT: ret i32 [[TMP51]]
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86
+// CHECK1-SAME: (ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null)
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined, ptr [[TMP1]], i64 [[TMP4]])
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
+// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
+// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91
+// CHECK1-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr null)
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined, ptr [[TMP1]])
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK1-NEXT: store double 2.500000e+00, ptr [[A]], align 8
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69
+// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null)
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined)
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73
+// CHECK1-SAME: (i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null)
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined)
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53
+// CHECK1-SAME: () #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 2, ptr null)
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined)
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58
+// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2
+// CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK1-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null)
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK1-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8
+// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined, i64 [[TMP4]], i64 [[TMP6]])
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_Z3bari
+// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP0]])
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CALL]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP2]])
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4
+// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], [[CALL1]]
+// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: [[CALL3:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP4]])
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4
+// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP5]], [[CALL3]]
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[A]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4
+// CHECK3-NEXT: ret i32 [[TMP6]]
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_ZN2S12r1Ei
+// CHECK3-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[B]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4
+// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP0]], [[TMP1]]
+// CHECK3-NEXT: store i32 [[SUB]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[B]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[B_CASTED]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_CASTED]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr null, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr null, ptr [[TMP14]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0
+// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 3, ptr [[TMP19]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 3, ptr [[TMP20]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP15]], ptr [[TMP21]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP16]], ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes, ptr [[TMP23]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes, ptr [[TMP24]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP25]], align 4
+// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP26]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP27]], align 8
+// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK3-NEXT: store i64 0, ptr [[TMP28]], align 8
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP29]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK3-NEXT: store [3 x i32] [[TMP18]], ptr [[TMP30]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK3-NEXT: store i32 0, ptr [[TMP31]], align 4
+// CHECK3-NEXT: [[TMP32:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 1, i32 [[TMP17]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP33:%.*]] = icmp ne i32 [[TMP32]], 0
+// CHECK3-NEXT: br i1 [[TMP33]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3: omp_offload.failed:
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86(ptr [[THIS1]], i32 [[TMP3]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]]
+// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// CHECK3: omp_offload.cont:
+// CHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[THIS1]], ptr [[TMP34]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A2]], ptr [[TMP35]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP36]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK3-NEXT: store i32 3, ptr [[TMP39]], align 4
+// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP40]], align 4
+// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP37]], ptr [[TMP41]], align 4
+// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP38]], ptr [[TMP42]], align 4
+// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.1, ptr [[TMP43]], align 4
+// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP44]], align 4
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP45]], align 4
+// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP46]], align 4
+// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP47]], align 8
+// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK3-NEXT: store i64 0, ptr [[TMP48]], align 8
+// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP49]], align 4
+// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK3-NEXT: store [3 x i32] [i32 1024, i32 0, i32 0], ptr [[TMP50]], align 4
+// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK3-NEXT: store i32 0, ptr [[TMP51]], align 4
+// CHECK3-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 1024, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK3-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0
+// CHECK3-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK3: omp_offload.failed7:
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91(ptr [[THIS1]]) #[[ATTR2]]
+// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT8]]
+// CHECK3: omp_offload.cont8:
+// CHECK3-NEXT: [[A9:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS1]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP54:%.*]] = load double, ptr [[A9]], align 4
+// CHECK3-NEXT: [[CONV:%.*]] = fptosi double [[TMP54]] to i32
+// CHECK3-NEXT: ret i32 [[CONV]]
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_ZL7fstatici
+// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED2:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_PTRS4:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [1 x ptr], align 4
+// CHECK3-NEXT: [[KERNEL_ARGS6:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP8]], 0
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 3, ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP6]], ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP13]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.3, ptr [[TMP14]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP16]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP17]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP18]], align 8
+// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK3-NEXT: store i64 0, ptr [[TMP19]], align 8
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP20]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK3-NEXT: store [3 x i32] [[TMP9]], ptr [[TMP21]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK3-NEXT: store i32 0, ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP8]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
+// CHECK3-NEXT: br i1 [[TMP24]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3: omp_offload.failed:
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69(i32 [[TMP2]]) #[[ATTR2]]
+// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// CHECK3: omp_offload.cont:
+// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 32, [[TMP25]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED2]], align 4
+// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP28]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK3-NEXT: store i32 [[TMP27]], ptr [[TMP29]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS5]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0
+// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0
+// CHECK3-NEXT: store i32 3, ptr [[TMP35]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1
+// CHECK3-NEXT: store i32 1, ptr [[TMP36]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP31]], ptr [[TMP37]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP32]], ptr [[TMP38]], align 4
+// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.5, ptr [[TMP39]], align 4
+// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP40]], align 4
+// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP41]], align 4
+// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4
+// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP43]], align 8
+// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 9
+// CHECK3-NEXT: store i64 0, ptr [[TMP44]], align 8
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 10
+// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP45]], align 4
+// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 11
+// CHECK3-NEXT: store [3 x i32] [[TMP34]], ptr [[TMP46]], align 4
+// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 12
+// CHECK3-NEXT: store i32 0, ptr [[TMP47]], align 4
+// CHECK3-NEXT: [[TMP48:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP33]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.region_id, ptr [[KERNEL_ARGS6]])
+// CHECK3-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0
+// CHECK3-NEXT: br i1 [[TMP49]], label [[OMP_OFFLOAD_FAILED7:%.*]], label [[OMP_OFFLOAD_CONT8:%.*]]
+// CHECK3: omp_offload.failed7:
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73(i32 [[TMP27]]) #[[ATTR2]]
+// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT8]]
+// CHECK3: omp_offload.cont8:
+// CHECK3-NEXT: [[TMP50:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP50]], 1
+// CHECK3-NEXT: ret i32 [[ADD9]]
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i
+// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK3-NEXT: [[B:%.*]] = alloca i16, align 2
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i16, align 2
+// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 4
+// CHECK3-NEXT: [[KERNEL_ARGS1:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[A]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 3, ptr [[TMP0]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 0, ptr [[TMP1]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr null, ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK3-NEXT: store ptr null, ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK3-NEXT: store ptr null, ptr [[TMP4]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK3-NEXT: store ptr null, ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP8]], align 8
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK3-NEXT: store i64 0, ptr [[TMP9]], align 8
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK3-NEXT: store [3 x i32] [i32 20, i32 0, i32 0], ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK3-NEXT: store i32 0, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 20, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.region_id, ptr [[KERNEL_ARGS]])
+// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
+// CHECK3-NEXT: br i1 [[TMP14]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK3: omp_offload.failed:
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53() #[[ATTR2]]
+// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// CHECK3: omp_offload.cont:
+// CHECK3-NEXT: store i16 1, ptr [[B]], align 2
+// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[B]], align 2
+// CHECK3-NEXT: store i16 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 2
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4
+// CHECK3-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[A_CASTED]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[B]], align 2
+// CHECK3-NEXT: store i16 [[TMP18]], ptr [[B_CASTED]], align 2
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B_CASTED]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
+// CHECK3-NEXT: store i16 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 2
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 [[TMP17]], ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: store i32 [[TMP17]], ptr [[TMP23]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0
+// CHECK3-NEXT: store ptr null, ptr [[TMP24]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP25]], align 4
+// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP26]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1
+// CHECK3-NEXT: store ptr null, ptr [[TMP27]], align 4
+// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP28]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK3-NEXT: store i32 [[TMP21]], ptr [[TMP29]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2
+// CHECK3-NEXT: store ptr null, ptr [[TMP30]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP33:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2
+// CHECK3-NEXT: [[TMP34:%.*]] = zext i16 [[TMP33]] to i32
+// CHECK3-NEXT: [[TMP35:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP34]], 0
+// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 0
+// CHECK3-NEXT: store i32 3, ptr [[TMP36]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 1
+// CHECK3-NEXT: store i32 3, ptr [[TMP37]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 2
+// CHECK3-NEXT: store ptr [[TMP31]], ptr [[TMP38]], align 4
+// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 3
+// CHECK3-NEXT: store ptr [[TMP32]], ptr [[TMP39]], align 4
+// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 4
+// CHECK3-NEXT: store ptr @.offload_sizes.7, ptr [[TMP40]], align 4
+// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 5
+// CHECK3-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP41]], align 4
+// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 6
+// CHECK3-NEXT: store ptr null, ptr [[TMP42]], align 4
+// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 7
+// CHECK3-NEXT: store ptr null, ptr [[TMP43]], align 4
+// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 8
+// CHECK3-NEXT: store i64 0, ptr [[TMP44]], align 8
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 9
+// CHECK3-NEXT: store i64 0, ptr [[TMP45]], align 8
+// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 10
+// CHECK3-NEXT: store [3 x i32] [i32 1, i32 0, i32 0], ptr [[TMP46]], align 4
+// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 11
+// CHECK3-NEXT: store [3 x i32] [[TMP35]], ptr [[TMP47]], align 4
+// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS1]], i32 0, i32 12
+// CHECK3-NEXT: store i32 0, ptr [[TMP48]], align 4
+// CHECK3-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 1, i32 [[TMP34]], ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.region_id, ptr [[KERNEL_ARGS1]])
+// CHECK3-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
+// CHECK3-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED2:%.*]], label [[OMP_OFFLOAD_CONT3:%.*]]
+// CHECK3: omp_offload.failed2:
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58(i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP21]]) #[[ATTR2]]
+// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT3]]
+// CHECK3: omp_offload.cont3:
+// CHECK3-NEXT: [[TMP51:%.*]] = load i32, ptr [[A]], align 4
+// CHECK3-NEXT: ret i32 [[TMP51]]
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86
+// CHECK3-SAME: (ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null)
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined, ptr [[TMP1]], i32 [[TMP4]])
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
+// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
+// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91
+// CHECK3-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr null)
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined, ptr [[TMP1]])
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK3-NEXT: store double 2.500000e+00, ptr [[A]], align 4
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69
+// CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null)
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined)
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73
+// CHECK3-SAME: (i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null)
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined)
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53
+// CHECK3-SAME: () #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 2, ptr null)
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined)
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58
+// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2
+// CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK3-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null)
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK3-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4
+// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined, i32 [[TMP4]], i32 [[TMP6]])
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined
+// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK3-NEXT: entry:
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]]
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK3-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69
+// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null)
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined)
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73
+// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null)
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined)
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86
+// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null)
+// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4
+// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[B_CASTED]], align 8
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined, ptr [[TMP1]], i64 [[TMP4]])
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK9-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
+// CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
+// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK9-NEXT: store double [[ADD]], ptr [[A]], align 8
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91
+// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr null)
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined, ptr [[TMP1]])
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK9-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53
+// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 2, ptr null)
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined)
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58
+// CHECK9-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK9-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2
+// CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null)
+// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
+// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8
+// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2
+// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8
+// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined, i64 [[TMP4]], i64 [[TMP6]])
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined
+// CHECK9-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK9-NEXT: entry:
+// CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
+// CHECK9-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK9-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK9-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK9-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK9-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]]
+// CHECK9-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK9-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69
+// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null)
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined)
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l69.omp_outlined
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73
+// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 2, ptr null)
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined)
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l73.omp_outlined
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86
+// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null)
+// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B_CASTED]], align 4
+// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_CASTED]], align 4
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined, ptr [[TMP1]], i32 [[TMP4]])
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l86.omp_outlined
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double
+// CHECK11-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
+// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK11-NEXT: store double [[ADD]], ptr [[A]], align 4
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91
+// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 2, ptr null)
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined, ptr [[TMP1]])
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l91.omp_outlined
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[THIS:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
+// CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53
+// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 2, ptr null)
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined)
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53.omp_outlined
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58
+// CHECK11-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK11-NEXT: [[TMP1:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR__ADDR]], align 2
+// CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 2, ptr null)
+// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
+// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
+// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B_CASTED]], align 2
+// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4
+// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined, i32 [[TMP4]], i32 [[TMP6]])
+// CHECK11-NEXT: ret void
+//
+//
+// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58.omp_outlined
+// CHECK11-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK11-NEXT: entry:
+// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
+// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK11-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK11-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK11-NEXT: [[TMP0:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV]]
+// CHECK11-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK11-NEXT: ret void
+//
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index f974cfc78c8dd..119e2dcbc2b77 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -217,6 +217,8 @@ __OMP_RTL(__kmpc_omp_taskwait, false, Int32, IdentPtr, Int32)
__OMP_RTL(__kmpc_omp_taskyield, false, Int32, IdentPtr, Int32, /* Int */ Int32)
__OMP_RTL(__kmpc_push_num_threads, false, Void, IdentPtr, Int32,
/* Int */ Int32)
+__OMP_RTL(__kmpc_push_num_threads_strict, false, Void, IdentPtr, Int32,
+ /* Int */ Int32, /* Int */ Int32, /* const char* */ Int8Ptr)
__OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
__OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32,
/* kmp_task_t */ VoidPtr, Int32,
@@ -470,6 +472,8 @@ __OMP_RTL(__kmpc_target_deinit, false, Void,)
__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
__OMP_RTL(__kmpc_parallel_51, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
VoidPtr, VoidPtr, VoidPtrPtr, SizeTy)
+__OMP_RTL(__kmpc_parallel_60, false, Void, IdentPtr, Int32, Int32, Int32, Int32,
+ VoidPtr, VoidPtr, VoidPtrPtr, SizeTy, Int32, Int32, Int8Ptr)
__OMP_RTL(__kmpc_for_static_loop_4, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32)
__OMP_RTL(__kmpc_for_static_loop_4u, false, Void, IdentPtr, VoidPtr, VoidPtr, Int32, Int32, Int32)
__OMP_RTL(__kmpc_for_static_loop_8, false, Void, IdentPtr, VoidPtr, VoidPtr, Int64, Int64, Int64)
@@ -708,6 +712,10 @@ __OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, SExt,
ParamAttrs(ReadOnlyPtrAttrs, SExt, SExt))
__OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs,
AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, SExt, SExt))
+__OMP_RTL_ATTRS(__kmpc_push_num_threads_strict, InaccessibleArgOnlyAttrs,
+ AttributeSet(),
+ ParamAttrs(ReadOnlyPtrAttrs, SExt, SExt, SExt,
+ ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_push_proc_bind, InaccessibleArgOnlyAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs, SExt, SExt))
__OMP_RTL_ATTRS(__kmpc_omp_reg_task_with_affinity, DefaultAttrs, SExt,
@@ -1079,6 +1087,10 @@ __OMP_RTL_ATTRS(__kmpc_parallel_51, AlwaysInlineAttrs, AttributeSet(),
ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
AttributeSet(), AttributeSet(), AttributeSet(),
SizeTyExt))
+__OMP_RTL_ATTRS(__kmpc_parallel_60, AlwaysInlineAttrs, AttributeSet(),
+ ParamAttrs(AttributeSet(), SExt, SExt, SExt, SExt,
+ AttributeSet(), AttributeSet(), AttributeSet(),
+ SizeTyExt, SExt, SExt, AttributeSet()))
__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs,
AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs, SExt))
__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs,
>From 8296fde47f8e8fccfe8127a6d6b5dff773cbcedd Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Tue, 1 Jul 2025 08:08:26 -0500
Subject: [PATCH 2/6] GPU codegen doesn't use emitNumThreadsClause -> pass
information to emitParallelCall as well
---
clang/lib/CodeGen/CGStmtOpenMP.cpp | 12 +-
...cn_target_parallel_num_threads_codegen.cpp | 1067 +++++++++++++++++
...tx_target_parallel_num_threads_codegen.cpp | 8 +-
3 files changed, 1079 insertions(+), 8 deletions(-)
create mode 100644 clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index ad7b941f92dd6..c0c71cfba0516 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1608,6 +1608,11 @@ static void emitCommonOMPParallelDirective(
const CodeGenBoundParametersTy &CodeGenBoundParameters) {
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
llvm::Value *NumThreads = nullptr;
+ OpenMPNumThreadsClauseModifier Modifier = OMPC_NUMTHREADS_unknown;
+ // OpenMP 6.0, 10.4: "If no severity clause is specified then the effect is as
+ // if sev-level is fatal."
+ OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
+ clang::Expr *Message = nullptr;
llvm::Function *OutlinedFn =
CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
@@ -1616,9 +1621,7 @@ static void emitCommonOMPParallelDirective(
CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
/*IgnoreResultAssign=*/true);
- OpenMPNumThreadsClauseModifier Modifier = NumThreadsClause->getModifier();
- OpenMPSeverityClauseKind Severity = OMPC_SEVERITY_fatal;
- clang::Expr *Message = nullptr;
+ Modifier = NumThreadsClause->getModifier();
if (const auto *MessageClause = S.getSingleClause<OMPMessageClause>())
Message = MessageClause->getMessageString();
if (const auto *SeverityClause = S.getSingleClause<OMPSeverityClause>())
@@ -1650,7 +1653,8 @@ static void emitCommonOMPParallelDirective(
CodeGenBoundParameters(CGF, S, CapturedVars);
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
- CapturedVars, IfCond, NumThreads);
+ CapturedVars, IfCond, NumThreads,
+ Modifier, Severity, Message);
}
static bool isAllocatableDecl(const VarDecl *VD) {
diff --git a/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp
new file mode 100644
index 0000000000000..4f79ef2617909
--- /dev/null
+++ b/clang/test/OpenMP/amdgcn_target_parallel_num_threads_codegen.cpp
@@ -0,0 +1,1067 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_1
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=OMP45_2
+
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_1
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2
+// RUN: %clang_cc1 -DOMP60 -verify -fopenmp -fopenmp-version=60 -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefixes=OMP60_2
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2
+// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK2
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template<typename tx>
+tx ftemplate(int n) {
+ tx a = 0;
+ short aa = 0;
+ tx b[10];
+
+ #pragma omp target parallel map(tofrom: aa) num_threads(1024)
+ {
+ aa += 1;
+ }
+ #ifdef OMP60
+ #pragma omp target parallel map(tofrom: aa) num_threads(strict: 1024)
+ {
+ aa += 1;
+ }
+ #endif
+
+ #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(n)
+ {
+ a += 1;
+ aa += 1;
+ b[2] += 1;
+ }
+ #ifdef OMP60
+ #pragma omp target parallel map(tofrom:a, aa, b) if(target: n>40) num_threads(strict: n)
+ {
+ a += 1;
+ aa += 1;
+ b[2] += 1;
+ }
+ #endif
+
+ return a;
+}
+
+int bar(int n){
+ int a = 0;
+
+ a += ftemplate<int>(n);
+
+ return a;
+}
+
+#endif
+// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// OMP45_1-NEXT: entry:
+// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// OMP45_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// OMP45_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP45_1: user_code.entry:
+// OMP45_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// OMP45_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
+// OMP45_1-NEXT: call void @__kmpc_target_deinit()
+// OMP45_1-NEXT: ret void
+// OMP45_1: worker.exit:
+// OMP45_1-NEXT: ret void
+//
+//
+// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP45_1-NEXT: entry:
+// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// OMP45_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP45_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP45_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP45_1-NEXT: ret void
+//
+//
+// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// OMP45_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// OMP45_1-NEXT: entry:
+// OMP45_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// OMP45_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP45_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP45_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP45_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// OMP45_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP45_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]]
+// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// OMP45_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP45_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP45_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP45_1: user_code.entry:
+// OMP45_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// OMP45_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP45_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP45_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// OMP45_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP45_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// OMP45_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP45_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
+// OMP45_1-NEXT: call void @__kmpc_target_deinit()
+// OMP45_1-NEXT: ret void
+// OMP45_1: worker.exit:
+// OMP45_1-NEXT: ret void
+//
+//
+// OMP45_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// OMP45_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP45_1-NEXT: entry:
+// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP45_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP45_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP45_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP45_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP45_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP45_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// OMP45_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// OMP45_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// OMP45_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP45_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP45_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP45_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP45_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP45_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP45_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP45_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP45_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP45_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP45_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP45_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP45_1-NEXT: ret void
+//
+//
+// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// OMP45_2-NEXT: entry:
+// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// OMP45_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// OMP45_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP45_2: user_code.entry:
+// OMP45_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// OMP45_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
+// OMP45_2-NEXT: call void @__kmpc_target_deinit()
+// OMP45_2-NEXT: ret void
+// OMP45_2: worker.exit:
+// OMP45_2-NEXT: ret void
+//
+//
+// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP45_2-NEXT: entry:
+// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// OMP45_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP45_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP45_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP45_2-NEXT: ret void
+//
+//
+// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// OMP45_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// OMP45_2-NEXT: entry:
+// OMP45_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// OMP45_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP45_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP45_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP45_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// OMP45_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP45_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]]
+// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// OMP45_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP45_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP45_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP45_2: user_code.entry:
+// OMP45_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// OMP45_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP45_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP45_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// OMP45_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP45_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// OMP45_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP45_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
+// OMP45_2-NEXT: call void @__kmpc_target_deinit()
+// OMP45_2-NEXT: ret void
+// OMP45_2: worker.exit:
+// OMP45_2-NEXT: ret void
+//
+//
+// OMP45_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// OMP45_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP45_2-NEXT: entry:
+// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP45_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP45_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP45_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP45_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP45_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP45_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP45_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// OMP45_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// OMP45_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// OMP45_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP45_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP45_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP45_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP45_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP45_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP45_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP45_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP45_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP45_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP45_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP45_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP45_2-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_1: user_code.entry:
+// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
+// OMP60_1-NEXT: call void @__kmpc_target_deinit()
+// OMP60_1-NEXT: ret void
+// OMP60_1: worker.exit:
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
+// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_1: user_code.entry:
+// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1, i32 1, i32 2, ptr null)
+// OMP60_1-NEXT: call void @__kmpc_target_deinit()
+// OMP60_1-NEXT: ret void
+// OMP60_1: worker.exit:
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
+// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP60_1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11:![0-9]+]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_1: user_code.entry:
+// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
+// OMP60_1-NEXT: call void @__kmpc_target_deinit()
+// OMP60_1-NEXT: ret void
+// OMP60_1: worker.exit:
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49
+// OMP60_1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// OMP60_1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP60_1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// OMP60_1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP60_1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP60_1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP60_1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_1: user_code.entry:
+// OMP60_1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// OMP60_1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP60_1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3, i32 1, i32 2, ptr null)
+// OMP60_1-NEXT: call void @__kmpc_target_deinit()
+// OMP60_1-NEXT: ret void
+// OMP60_1: worker.exit:
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined
+// OMP60_1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP60_1-NEXT: entry:
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP60_1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP60_1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP60_1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP60_1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP60_1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP60_1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP60_1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP60_1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP60_1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP60_1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP60_1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP60_1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP60_1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP60_1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP60_1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP60_1-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_2: user_code.entry:
+// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
+// OMP60_2-NEXT: call void @__kmpc_target_deinit()
+// OMP60_2-NEXT: ret void
+// OMP60_2: worker.exit:
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
+// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_2: user_code.entry:
+// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1, i32 1, i32 2, ptr null)
+// OMP60_2-NEXT: call void @__kmpc_target_deinit()
+// OMP60_2-NEXT: ret void
+// OMP60_2: worker.exit:
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
+// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// OMP60_2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11:![0-9]+]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_2: user_code.entry:
+// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
+// OMP60_2-NEXT: call void @__kmpc_target_deinit()
+// OMP60_2-NEXT: ret void
+// OMP60_2: worker.exit:
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49
+// OMP60_2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// OMP60_2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP60_2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// OMP60_2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// OMP60_2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_kernel_environment to ptr), ptr [[DYN_PTR]])
+// OMP60_2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// OMP60_2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// OMP60_2: user_code.entry:
+// OMP60_2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// OMP60_2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// OMP60_2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3, i32 1, i32 2, ptr null)
+// OMP60_2-NEXT: call void @__kmpc_target_deinit()
+// OMP60_2-NEXT: ret void
+// OMP60_2: worker.exit:
+// OMP60_2-NEXT: ret void
+//
+//
+// OMP60_2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined
+// OMP60_2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// OMP60_2-NEXT: entry:
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// OMP60_2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// OMP60_2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// OMP60_2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// OMP60_2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// OMP60_2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// OMP60_2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// OMP60_2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// OMP60_2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// OMP60_2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// OMP60_2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// OMP60_2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// OMP60_2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// OMP60_2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// OMP60_2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// OMP60_2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// OMP60_2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// OMP60_2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// OMP60_2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// OMP60_2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// OMP60_2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// OMP60_2-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]])
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK1: user_code.entry:
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: ret void
+// CHECK1: worker.exit:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// CHECK1-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]])
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK1: user_code.entry:
+// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_target_deinit()
+// CHECK1-NEXT: ret void
+// CHECK1: worker.exit:
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK1-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// CHECK1-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
+// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK2-NEXT: entry:
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment to ptr), ptr [[DYN_PTR]])
+// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK2: user_code.entry:
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
+// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: ret void
+// CHECK2: worker.exit:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
+// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK2-NEXT: entry:
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
+// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
+// CHECK2-NEXT: store i16 [[CONV1]], ptr [[TMP0]], align 2
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
+// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
+// CHECK2-NEXT: entry:
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment to ptr), ptr [[DYN_PTR]])
+// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
+// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK2: user_code.entry:
+// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr))
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
+// CHECK2-NEXT: call void @__kmpc_target_deinit()
+// CHECK2-NEXT: ret void
+// CHECK2: worker.exit:
+// CHECK2-NEXT: ret void
+//
+//
+// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
+// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
+// CHECK2-NEXT: entry:
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP1]], align 2
+// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP4]] to i32
+// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
+// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
+// CHECK2-NEXT: store i16 [[CONV2]], ptr [[TMP1]], align 2
+// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP2]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4
+// CHECK2-NEXT: ret void
+//
diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
index 7de49fed0ca65..1c96ffdec88e1 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
@@ -339,7 +339,7 @@ int bar(int n){
// OMP60_1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// OMP60_1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
// OMP60_1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1, i32 1, i32 2, ptr null)
// OMP60_1-NEXT: call void @__kmpc_target_deinit()
// OMP60_1-NEXT: ret void
// OMP60_1: worker.exit:
@@ -460,7 +460,7 @@ int bar(int n){
// OMP60_1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
// OMP60_1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
// OMP60_1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
-// OMP60_1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// OMP60_1-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3, i32 1, i32 2, ptr null)
// OMP60_1-NEXT: call void @__kmpc_target_deinit()
// OMP60_1-NEXT: ret void
// OMP60_1: worker.exit:
@@ -555,7 +555,7 @@ int bar(int n){
// OMP60_2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// OMP60_2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
// OMP60_2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1, i32 1, i32 2, ptr null)
// OMP60_2-NEXT: call void @__kmpc_target_deinit()
// OMP60_2-NEXT: ret void
// OMP60_2: worker.exit:
@@ -676,7 +676,7 @@ int bar(int n){
// OMP60_2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
// OMP60_2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
// OMP60_2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
-// OMP60_2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// OMP60_2-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3, i32 1, i32 2, ptr null)
// OMP60_2-NEXT: call void @__kmpc_target_deinit()
// OMP60_2-NEXT: ret void
// OMP60_2: worker.exit:
>From c79a1f169f14ec186928516e63b5302e47218ecc Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 23 Jul 2025 02:41:09 -0500
Subject: [PATCH 3/6] implement feedback
---
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index aadae117a5b85..d5bd37a91358b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2718,11 +2718,14 @@ void CGOpenMPRuntime::emitNumThreadsClause(
// as if sev-level is fatal."
Args.push_back(llvm::ConstantInt::get(
CGM.Int32Ty, Severity == OMPC_SEVERITY_warning ? 1 : 2));
- if (Message)
- Args.push_back(CGF.EmitStringLiteralLValue(cast<StringLiteral>(Message))
- .getPointer(CGF));
- else
+ if (Message) {
+ if (const StringLiteral *Msg = dyn_cast<StringLiteral>(Message))
+ Args.push_back(CGF.EmitStringLiteralLValue(Msg).getPointer(CGF));
+ else
+ Args.push_back(CGF.EmitScalarExpr(Message));
+ } else {
Args.push_back(llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
+ }
}
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
>From d039caae9e3a7836c81853e8ed9187e393c6d6f2 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 31 Jul 2025 06:34:21 -0500
Subject: [PATCH 4/6] support non-string-literals for the message clause
---
clang/include/clang/AST/OpenMPClause.h | 16 +-
.../clang/Basic/DiagnosticParseKinds.td | 4 +-
clang/lib/AST/OpenMPClause.cpp | 8 +-
clang/lib/CodeGen/CGOpenMPRuntime.cpp | 15 +-
clang/lib/Sema/SemaOpenMP.cpp | 29 +-
clang/lib/Sema/TreeTransform.h | 3 +-
clang/test/OpenMP/error_codegen.cpp | 734 +++++++++++++++++-
clang/test/OpenMP/error_message.cpp | 6 +-
.../test/OpenMP/parallel_message_messages.cpp | 28 +-
.../OpenMP/parallel_num_threads_codegen.cpp | 29 +-
10 files changed, 802 insertions(+), 70 deletions(-)
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 1118d3e062e68..b5dabf283964e 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1874,6 +1874,10 @@ class OMPMessageClause final : public OMPClause {
// Expression of the 'message' clause.
Stmt *MessageString = nullptr;
+ // The message as a StringLiteral in case it is as string literal. This might
+ // be needed during compile time.
+ StringLiteral *MessageStringLiteral = nullptr;
+
/// Set message string of the clause.
void setMessageString(Expr *MS) { MessageString = MS; }
@@ -1887,10 +1891,14 @@ class OMPMessageClause final : public OMPClause {
/// \param StartLoc Starting location of the clause.
/// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause.
+ /// \param MessageStringLiteral The message as a StringLiteral in case it is
+ /// as string literal. This might be needed during compile time.
OMPMessageClause(Expr *MS, SourceLocation StartLoc, SourceLocation LParenLoc,
- SourceLocation EndLoc)
+ SourceLocation EndLoc,
+ StringLiteral *MessageStringLiteral = nullptr)
: OMPClause(llvm::omp::OMPC_message, StartLoc, EndLoc),
- LParenLoc(LParenLoc), MessageString(MS) {}
+ LParenLoc(LParenLoc), MessageString(MS),
+ MessageStringLiteral(MessageStringLiteral) {}
/// Build an empty clause.
OMPMessageClause()
@@ -1903,6 +1911,10 @@ class OMPMessageClause final : public OMPClause {
/// Returns message string of the clause.
Expr *getMessageString() const { return cast_or_null<Expr>(MessageString); }
+ // Returns the source message as a string literal in case it has been a string
+ // literal. Otherwise, return null.
+ StringLiteral *getAsStringLiteral() const { return MessageStringLiteral; }
+
child_range children() {
return child_range(&MessageString, &MessageString + 1);
}
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 35903af998fbe..f52c6dca35e3b 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1499,8 +1499,10 @@ def err_omp_unexpected_directive : Error<
"unexpected OpenMP directive %select{|'#pragma omp %1'}0">;
def err_omp_expected_punc : Error<
"expected ',' or ')' in '%0' %select{clause|directive}1">;
-def warn_clause_expected_string : Warning<
+def warn_clause_expected_string_literal : Warning<
"expected string literal in 'clause %0' - ignoring">, InGroup<IgnoredPragmas>;
+def warn_clause_expected_string: Warning<
+ "expected string in 'clause %0' - ignoring">, InGroup<IgnoredPragmas>;
def err_omp_unexpected_clause : Error<
"unexpected OpenMP clause '%0' in directive '#pragma omp %1'">;
def err_omp_unexpected_clause_extension_only : Error<
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index de8b5996818de..b561156cd2d20 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1963,8 +1963,12 @@ void OMPClausePrinter::VisitOMPSeverityClause(OMPSeverityClause *Node) {
}
void OMPClausePrinter::VisitOMPMessageClause(OMPMessageClause *Node) {
- OS << "message(\""
- << cast<StringLiteral>(Node->getMessageString())->getString() << "\")";
+ OS << "message(";
+ if (StringLiteral *SL = Node->getAsStringLiteral())
+ OS << "\"" << SL->getString() << "\"";
+ else if (Expr *E = Node->getMessageString())
+ E->printPretty(OS, nullptr, Policy);
+ OS << ")";
}
void OMPClausePrinter::VisitOMPScheduleClause(OMPScheduleClause *Node) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index d5bd37a91358b..848a1ea6a4b84 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2372,9 +2372,8 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
Expr *ME, bool IsFatal) {
- llvm::Value *MVL =
- ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ llvm::Value *MVL = ME ? CGF.EmitScalarExpr(ME)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
// Build call void __kmpc_error(ident_t *loc, int severity, const char
// *message)
llvm::Value *Args[] = {
@@ -2718,14 +2717,10 @@ void CGOpenMPRuntime::emitNumThreadsClause(
// as if sev-level is fatal."
Args.push_back(llvm::ConstantInt::get(
CGM.Int32Ty, Severity == OMPC_SEVERITY_warning ? 1 : 2));
- if (Message) {
- if (const StringLiteral *Msg = dyn_cast<StringLiteral>(Message))
- Args.push_back(CGF.EmitStringLiteralLValue(Msg).getPointer(CGF));
- else
- Args.push_back(CGF.EmitScalarExpr(Message));
- } else {
+ if (Message)
+ Args.push_back(CGF.EmitScalarExpr(Message));
+ else
Args.push_back(llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
- }
}
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 4ecc9b0d4c5c8..70e43379b64d2 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -11082,15 +11082,19 @@ StmtResult SemaOpenMP::ActOnOpenMPErrorDirective(ArrayRef<OMPClause *> Clauses,
OMPExecutableDirective::getSingleClause<OMPSeverityClause>(Clauses);
const OMPMessageClause *MessageC =
OMPExecutableDirective::getSingleClause<OMPMessageClause>(Clauses);
- Expr *ME = MessageC ? MessageC->getMessageString() : nullptr;
if (!AtC || AtC->getAtKind() == OMPC_AT_compilation) {
+ StringLiteral *SL = MessageC ? MessageC->getAsStringLiteral() : nullptr;
+ if (MessageC && !SL)
+ Diag(MessageC->getMessageString()->getBeginLoc(),
+ diag::warn_clause_expected_string_literal)
+ << getOpenMPClauseNameForDiag(OMPC_message);
if (SeverityC && SeverityC->getSeverityKind() == OMPC_SEVERITY_warning)
Diag(SeverityC->getSeverityKindKwLoc(), diag::warn_diagnose_if_succeeded)
- << (ME ? cast<StringLiteral>(ME)->getString() : "WARNING");
+ << (SL ? SL->getString() : "WARNING");
else
Diag(StartLoc, diag::err_diagnose_if_succeeded)
- << (ME ? cast<StringLiteral>(ME)->getString() : "ERROR");
+ << (SL ? SL->getString() : "ERROR");
if (!SeverityC || SeverityC->getSeverityKind() != OMPC_SEVERITY_warning)
return StmtError();
}
@@ -16445,13 +16449,28 @@ OMPClause *SemaOpenMP::ActOnOpenMPMessageClause(Expr *ME,
SourceLocation LParenLoc,
SourceLocation EndLoc) {
assert(ME && "NULL expr in Message clause");
- if (!isa<StringLiteral>(ME)) {
+ QualType Type = ME->getType();
+ if ((!Type->isPointerType() && !Type->isArrayType()) ||
+ !Type->getPointeeOrArrayElementType()->isAnyCharacterType()) {
Diag(ME->getBeginLoc(), diag::warn_clause_expected_string)
<< getOpenMPClauseNameForDiag(OMPC_message);
return nullptr;
}
+
+ // If the string is a string literal, save it in case it is later needed
+ // during compile time. Also consider a const char pointer to a string
+ // literal. This is necessary for template instantiations where the string
+ // literal is not passed directly and we only get a const char pointer to it.
+ StringLiteral *SL = dyn_cast<StringLiteral>(
+ isa<ImplicitCastExpr>(ME) ? cast<ImplicitCastExpr>(ME)->getSubExpr()
+ : ME);
+
+ // Convert array type to pointer type if needed.
+ if (Type->isArrayType())
+ ME = SemaRef.DefaultFunctionArrayConversion(ME).get();
+
return new (getASTContext())
- OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc);
+ OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc, SL);
}
OMPClause *SemaOpenMP::ActOnOpenMPOrderClause(
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 3e38f8b183dfd..9489357919715 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -10948,8 +10948,7 @@ TreeTransform<Derived>::TransformOMPMessageClause(OMPMessageClause *C) {
if (E.isInvalid())
return nullptr;
return getDerived().RebuildOMPMessageClause(
- C->getMessageString(), C->getBeginLoc(), C->getLParenLoc(),
- C->getEndLoc());
+ E.get(), C->getBeginLoc(), C->getLParenLoc(), C->getEndLoc());
}
template <typename Derived>
diff --git a/clang/test/OpenMP/error_codegen.cpp b/clang/test/OpenMP/error_codegen.cpp
index 70d493e01a709..0efb0ab098cf3 100644
--- a/clang/test/OpenMP/error_codegen.cpp
+++ b/clang/test/OpenMP/error_codegen.cpp
@@ -1,8 +1,9 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 5
// RUN: %clang_cc1 -std=c++11 -fopenmp -fopenmp-version=51 -triple x86_64 \
-// RUN: -emit-llvm -o - %s | FileCheck %s
+// RUN: -emit-llvm -o - %s | FileCheck --check-prefix OMP51 %s
// RUN: %clang_cc1 -std=c++11 -fopenmp -fopenmp-version=60 -triple x86_64 \
-// RUN: -emit-llvm -o - %s | FileCheck %s
+// RUN: -emit-llvm -o - %s | FileCheck --check-prefix OMP60 %s
// RUN: %clang_cc1 -std=c++11 -fopenmp-simd -fopenmp-version=51 \
// RUN: -debug-info-kind=limited -triple x86_64 -emit-llvm -o - %s | \
@@ -12,20 +13,6 @@
// RUN: -debug-info-kind=limited -triple x86_64 -emit-llvm -o - %s | \
// RUN: FileCheck --check-prefix SIMD %s
-//CHECK: @.str = private unnamed_addr constant [23 x i8] c"GPU compiler required.\00", align 1
-//CHECK: @0 = private unnamed_addr constant {{.*}}error_codegen.cpp;main;59;1;;\00", align 1
-//CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @0 }, align 8
-//CHECK: @.str.1 = private unnamed_addr constant [27 x i8] c"Note this is functioncall.\00", align 1
-//CHECK: @2 = private unnamed_addr constant {{.*}}error_codegen.cpp;main;61;1;;\00", align 1
-//CHECK: @3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @2 }, align 8
-//CHECK: @.str.2 = private unnamed_addr constant [23 x i8] c"GNU compiler required.\00", align 1
-//CHECK: @4 = private unnamed_addr constant {{.*}}error_codegen.cpp;tmain;36;1;;\00", align 1
-//CHECK: @5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @4 }, align 8
-//CHECK: @.str.3 = private unnamed_addr constant [22 x i8] c"Notice: add for loop.\00", align 1
-//CHECK: @6 = private unnamed_addr constant {{.*}}error_codegen.cpp;tmain;39;1;;\00", align 1
-//CHECK: @7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @6 }, align 8
-//CHECK: @8 = private unnamed_addr constant {{.*}}error_codegen.cpp;tmain;45;1;;\00", align 1
-//CHECK: @9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 {{.*}}, ptr @8 }, align 8
void foo() {}
@@ -33,42 +20,723 @@ template <typename T, int N>
int tmain(T argc, char **argv) {
T b = argc, c, d, e, f, g;
static int a;
+ const char str1[] = "msg";
+ const char *str2 = "msg";
+ char str3[] = "msg";
+ char *str4 = str3;
+ char * const str5 = str3;
#pragma omp error at(execution) severity(fatal) message("GNU compiler required.")
+#pragma omp error at(execution) severity(fatal) message(str1)
+#pragma omp error at(execution) severity(fatal) message(str2)
+#pragma omp error at(execution) severity(fatal) message(str3)
+#pragma omp error at(execution) severity(fatal) message(str4)
+#pragma omp error at(execution) severity(fatal) message(str5)
a = argv[0][0];
++a;
#pragma omp error at(execution) severity(warning) message("Notice: add for loop.")
+#pragma omp error at(execution) severity(warning) message(str1)
+#pragma omp error at(execution) severity(warning) message(str2)
+#pragma omp error at(execution) severity(warning) message(str3)
+#pragma omp error at(execution) severity(warning) message(str4)
+#pragma omp error at(execution) severity(warning) message(str5)
{
int b = 10;
T c = 100;
a = b + c;
}
-#pragma omp error at(execution) severity(fatal) message("GPU compiler required.")
+#pragma omp error at(execution) severity(fatal) message("GPU compiler required.")
+#pragma omp error at(execution) severity(fatal) message(str1)
+#pragma omp error at(execution) severity(fatal) message(str2)
+#pragma omp error at(execution) severity(fatal) message(str3)
+#pragma omp error at(execution) severity(fatal) message(str4)
+#pragma omp error at(execution) severity(fatal) message(str5)
foo();
return N;
}
-// CHECK-LABEL: @main(
-// SIMD-LABEL: @main(
-// CHECK: call void @__kmpc_error(ptr @1, i32 2, ptr @.str)
-// SIMD-NOT: call void @__kmpc_error(ptr @1, i32 2, ptr @.str)
-// CHECK: call void @__kmpc_error(ptr @3, i32 1, ptr @.str.1)
-// SIMD-NOT: call void @__kmpc_error(ptr @3, i32 1, ptr @.str.1)
-//
+
+
+
int main (int argc, char **argv) {
int b = argc, c, d, e, f, g;
static int a;
+ const char str1[] = "msg";
+ const char *str2 = "msg";
+ char str3[] = "msg";
+ char *str4 = str3;
+ char * const str5 = str3;
#pragma omp error at(execution) severity(fatal) message("GPU compiler required.")
+#pragma omp error at(execution) severity(fatal) message(str1)
+#pragma omp error at(execution) severity(fatal) message(str2)
+#pragma omp error at(execution) severity(fatal) message(str3)
+#pragma omp error at(execution) severity(fatal) message(str4)
+#pragma omp error at(execution) severity(fatal) message(str5)
a=2;
#pragma omp error at(execution) severity(warning) message("Note this is functioncall.")
+#pragma omp error at(execution) severity(warning) message(str1)
+#pragma omp error at(execution) severity(warning) message(str2)
+#pragma omp error at(execution) severity(warning) message(str3)
+#pragma omp error at(execution) severity(warning) message(str4)
+#pragma omp error at(execution) severity(warning) message(str5)
foo();
tmain<int, 10>(argc, argv);
}
-//CHECK-LABEL: @_Z5tmainIiLi10EEiT_PPc(
-//SIMD-LABEL: @_Z5tmainIiLi10EEiT_PPc(
-//CHECK: call void @__kmpc_error(ptr @5, i32 2, ptr @.str.2)
-//CHECK: call void @__kmpc_error(ptr @7, i32 1, ptr @.str.3)
-//CHECK: call void @__kmpc_error(ptr @9, i32 2, ptr @.str)
-//SIMD-NOT: call void @__kmpc_error(ptr @5, i32 2, ptr @.str.2)
-//SIMD-NOT: call void @__kmpc_error(ptr @7, i32 1, ptr @.str.3)
-//SIMD-NOT: call void @__kmpc_error(ptr @9, i32 2, ptr @.str)
-//CHECK: ret i32 10
+// CHECK-LABEL: define dso_local void @_Z3foov(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret void
+// CHECK-LABEL: define dso_local noundef i32 @main(
+// CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[D:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[E:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[F:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[G:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1
+// CHECK-NEXT: [[STR2:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1
+// CHECK-NEXT: [[STR4:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[STR5:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// CHECK-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const.main.str1, i64 4, i1 false)
+// CHECK-NEXT: store ptr @.str, ptr [[STR2]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const.main.str3, i64 4, i1 false)
+// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8
+// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.str.1)
+// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB3:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]])
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB5:[0-9]+]], i32 2, ptr [[TMP1]])
+// CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB7:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB9:[0-9]+]], i32 2, ptr [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB11:[0-9]+]], i32 2, ptr [[TMP3]])
+// CHECK-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB13:[0-9]+]], i32 1, ptr @.str.2)
+// CHECK-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB15:[0-9]+]], i32 1, ptr [[ARRAYDECAY4]])
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[STR2]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB17:[0-9]+]], i32 1, ptr [[TMP4]])
+// CHECK-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB19:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]])
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[STR4]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB21:[0-9]+]], i32 1, ptr [[TMP5]])
+// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[STR5]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB23:[0-9]+]], i32 1, ptr [[TMP6]])
+// CHECK-NEXT: call void @_Z3foov()
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_PPc(i32 noundef [[TMP7]], ptr noundef [[TMP8]])
+// CHECK-NEXT: ret i32 0
+// CHECK-LABEL: define linkonce_odr noundef i32 @_Z5tmainIiLi10EEiT_PPc(
+// CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0]] comdat {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[D:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[E:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[F:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[G:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1
+// CHECK-NEXT: [[STR2:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1
+// CHECK-NEXT: [[STR4:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[STR5:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[B7:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[C8:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// CHECK-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP0]], ptr [[B]], align 4
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str1, i64 4, i1 false)
+// CHECK-NEXT: store ptr @.str, ptr [[STR2]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str3, i64 4, i1 false)
+// CHECK-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8
+// CHECK-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB25:[0-9]+]], i32 2, ptr @.str.3)
+// CHECK-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB27:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]])
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB29:[0-9]+]], i32 2, ptr [[TMP1]])
+// CHECK-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB31:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB33:[0-9]+]], i32 2, ptr [[TMP2]])
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB35:[0-9]+]], i32 2, ptr [[TMP3]])
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+// CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
+// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32
+// CHECK-NEXT: store i32 [[CONV]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK-NEXT: store i32 [[INC]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB37:[0-9]+]], i32 1, ptr @.str.4)
+// CHECK-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB39:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]])
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[STR2]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB41:[0-9]+]], i32 1, ptr [[TMP8]])
+// CHECK-NEXT: [[ARRAYDECAY6:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB43:[0-9]+]], i32 1, ptr [[ARRAYDECAY6]])
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[STR4]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB45:[0-9]+]], i32 1, ptr [[TMP9]])
+// CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[STR5]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB47:[0-9]+]], i32 1, ptr [[TMP10]])
+// CHECK-NEXT: store i32 10, ptr [[B7]], align 4
+// CHECK-NEXT: store i32 100, ptr [[C8]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[B7]], align 4
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[C8]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// CHECK-NEXT: store i32 [[ADD]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB49:[0-9]+]], i32 2, ptr @.str.1)
+// CHECK-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB51:[0-9]+]], i32 2, ptr [[ARRAYDECAY9]])
+// CHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[STR2]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB53:[0-9]+]], i32 2, ptr [[TMP13]])
+// CHECK-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB55:[0-9]+]], i32 2, ptr [[ARRAYDECAY10]])
+// CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[STR4]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB57:[0-9]+]], i32 2, ptr [[TMP14]])
+// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[STR5]], align 8
+// CHECK-NEXT: call void @__kmpc_error(ptr @[[GLOB59:[0-9]+]], i32 2, ptr [[TMP15]])
+// CHECK-NEXT: call void @_Z3foov()
+// CHECK-NEXT: ret i32 10
+// OMP51-LABEL: define dso_local void @_Z3foov(
+// OMP51-SAME: ) #[[ATTR0:[0-9]+]] {
+// OMP51-NEXT: [[ENTRY:.*:]]
+// OMP51-NEXT: ret void
+//
+//
+// OMP51-LABEL: define dso_local noundef i32 @main(
+// OMP51-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP51-NEXT: [[ENTRY:.*:]]
+// OMP51-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// OMP51-NEXT: [[B:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[C:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[D:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[E:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[F:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[G:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1
+// OMP51-NEXT: [[STR2:%.*]] = alloca ptr, align 8
+// OMP51-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1
+// OMP51-NEXT: [[STR4:%.*]] = alloca ptr, align 8
+// OMP51-NEXT: [[STR5:%.*]] = alloca ptr, align 8
+// OMP51-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// OMP51-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+// OMP51-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// OMP51-NEXT: store i32 [[TMP0]], ptr [[B]], align 4
+// OMP51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const.main.str1, i64 4, i1 false)
+// OMP51-NEXT: store ptr @.str, ptr [[STR2]], align 8
+// OMP51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const.main.str3, i64 4, i1 false)
+// OMP51-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8
+// OMP51-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.str.1)
+// OMP51-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB3:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]])
+// OMP51-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB5:[0-9]+]], i32 2, ptr [[TMP1]])
+// OMP51-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB7:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]])
+// OMP51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB9:[0-9]+]], i32 2, ptr [[TMP2]])
+// OMP51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB11:[0-9]+]], i32 2, ptr [[TMP3]])
+// OMP51-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB13:[0-9]+]], i32 1, ptr @.str.2)
+// OMP51-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB15:[0-9]+]], i32 1, ptr [[ARRAYDECAY4]])
+// OMP51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB17:[0-9]+]], i32 1, ptr [[TMP4]])
+// OMP51-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB19:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]])
+// OMP51-NEXT: [[TMP5:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB21:[0-9]+]], i32 1, ptr [[TMP5]])
+// OMP51-NEXT: [[TMP6:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB23:[0-9]+]], i32 1, ptr [[TMP6]])
+// OMP51-NEXT: call void @_Z3foov()
+// OMP51-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// OMP51-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// OMP51-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_PPc(i32 noundef [[TMP7]], ptr noundef [[TMP8]])
+// OMP51-NEXT: ret i32 0
+//
+//
+// OMP51-LABEL: define linkonce_odr noundef i32 @_Z5tmainIiLi10EEiT_PPc(
+// OMP51-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0]] comdat {
+// OMP51-NEXT: [[ENTRY:.*:]]
+// OMP51-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// OMP51-NEXT: [[B:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[C:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[D:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[E:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[F:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[G:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1
+// OMP51-NEXT: [[STR2:%.*]] = alloca ptr, align 8
+// OMP51-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1
+// OMP51-NEXT: [[STR4:%.*]] = alloca ptr, align 8
+// OMP51-NEXT: [[STR5:%.*]] = alloca ptr, align 8
+// OMP51-NEXT: [[B7:%.*]] = alloca i32, align 4
+// OMP51-NEXT: [[C8:%.*]] = alloca i32, align 4
+// OMP51-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// OMP51-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+// OMP51-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// OMP51-NEXT: store i32 [[TMP0]], ptr [[B]], align 4
+// OMP51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str1, i64 4, i1 false)
+// OMP51-NEXT: store ptr @.str, ptr [[STR2]], align 8
+// OMP51-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str3, i64 4, i1 false)
+// OMP51-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8
+// OMP51-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB25:[0-9]+]], i32 2, ptr @.str.3)
+// OMP51-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB27:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]])
+// OMP51-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB29:[0-9]+]], i32 2, ptr [[TMP1]])
+// OMP51-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB31:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]])
+// OMP51-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB33:[0-9]+]], i32 2, ptr [[TMP2]])
+// OMP51-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB35:[0-9]+]], i32 2, ptr [[TMP3]])
+// OMP51-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// OMP51-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0
+// OMP51-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+// OMP51-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
+// OMP51-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+// OMP51-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32
+// OMP51-NEXT: store i32 [[CONV]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// OMP51-NEXT: [[TMP7:%.*]] = load i32, ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// OMP51-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// OMP51-NEXT: store i32 [[INC]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB37:[0-9]+]], i32 1, ptr @.str.4)
+// OMP51-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB39:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]])
+// OMP51-NEXT: [[TMP8:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB41:[0-9]+]], i32 1, ptr [[TMP8]])
+// OMP51-NEXT: [[ARRAYDECAY6:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB43:[0-9]+]], i32 1, ptr [[ARRAYDECAY6]])
+// OMP51-NEXT: [[TMP9:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB45:[0-9]+]], i32 1, ptr [[TMP9]])
+// OMP51-NEXT: [[TMP10:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB47:[0-9]+]], i32 1, ptr [[TMP10]])
+// OMP51-NEXT: store i32 10, ptr [[B7]], align 4
+// OMP51-NEXT: store i32 100, ptr [[C8]], align 4
+// OMP51-NEXT: [[TMP11:%.*]] = load i32, ptr [[B7]], align 4
+// OMP51-NEXT: [[TMP12:%.*]] = load i32, ptr [[C8]], align 4
+// OMP51-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// OMP51-NEXT: store i32 [[ADD]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB49:[0-9]+]], i32 2, ptr @.str.1)
+// OMP51-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB51:[0-9]+]], i32 2, ptr [[ARRAYDECAY9]])
+// OMP51-NEXT: [[TMP13:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB53:[0-9]+]], i32 2, ptr [[TMP13]])
+// OMP51-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB55:[0-9]+]], i32 2, ptr [[ARRAYDECAY10]])
+// OMP51-NEXT: [[TMP14:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB57:[0-9]+]], i32 2, ptr [[TMP14]])
+// OMP51-NEXT: [[TMP15:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP51-NEXT: call void @__kmpc_error(ptr @[[GLOB59:[0-9]+]], i32 2, ptr [[TMP15]])
+// OMP51-NEXT: call void @_Z3foov()
+// OMP51-NEXT: ret i32 10
+//
+//
+// OMP60-LABEL: define dso_local void @_Z3foov(
+// OMP60-SAME: ) #[[ATTR0:[0-9]+]] {
+// OMP60-NEXT: [[ENTRY:.*:]]
+// OMP60-NEXT: ret void
+//
+//
+// OMP60-LABEL: define dso_local noundef i32 @main(
+// OMP60-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] {
+// OMP60-NEXT: [[ENTRY:.*:]]
+// OMP60-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// OMP60-NEXT: [[B:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[C:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[D:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[E:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[F:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[G:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1
+// OMP60-NEXT: [[STR2:%.*]] = alloca ptr, align 8
+// OMP60-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1
+// OMP60-NEXT: [[STR4:%.*]] = alloca ptr, align 8
+// OMP60-NEXT: [[STR5:%.*]] = alloca ptr, align 8
+// OMP60-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// OMP60-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+// OMP60-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// OMP60-NEXT: store i32 [[TMP0]], ptr [[B]], align 4
+// OMP60-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const.main.str1, i64 4, i1 false)
+// OMP60-NEXT: store ptr @.str, ptr [[STR2]], align 8
+// OMP60-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const.main.str3, i64 4, i1 false)
+// OMP60-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8
+// OMP60-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.str.1)
+// OMP60-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB3:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]])
+// OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB5:[0-9]+]], i32 2, ptr [[TMP1]])
+// OMP60-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB7:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]])
+// OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB9:[0-9]+]], i32 2, ptr [[TMP2]])
+// OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB11:[0-9]+]], i32 2, ptr [[TMP3]])
+// OMP60-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB13:[0-9]+]], i32 1, ptr @.str.2)
+// OMP60-NEXT: [[ARRAYDECAY4:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB15:[0-9]+]], i32 1, ptr [[ARRAYDECAY4]])
+// OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB17:[0-9]+]], i32 1, ptr [[TMP4]])
+// OMP60-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB19:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]])
+// OMP60-NEXT: [[TMP5:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB21:[0-9]+]], i32 1, ptr [[TMP5]])
+// OMP60-NEXT: [[TMP6:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB23:[0-9]+]], i32 1, ptr [[TMP6]])
+// OMP60-NEXT: call void @_Z3foov()
+// OMP60-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// OMP60-NEXT: [[TMP8:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// OMP60-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_PPc(i32 noundef [[TMP7]], ptr noundef [[TMP8]])
+// OMP60-NEXT: ret i32 0
+//
+//
+// OMP60-LABEL: define linkonce_odr noundef i32 @_Z5tmainIiLi10EEiT_PPc(
+// OMP60-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0]] comdat {
+// OMP60-NEXT: [[ENTRY:.*:]]
+// OMP60-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// OMP60-NEXT: [[B:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[C:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[D:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[E:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[F:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[G:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1
+// OMP60-NEXT: [[STR2:%.*]] = alloca ptr, align 8
+// OMP60-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1
+// OMP60-NEXT: [[STR4:%.*]] = alloca ptr, align 8
+// OMP60-NEXT: [[STR5:%.*]] = alloca ptr, align 8
+// OMP60-NEXT: [[B7:%.*]] = alloca i32, align 4
+// OMP60-NEXT: [[C8:%.*]] = alloca i32, align 4
+// OMP60-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// OMP60-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+// OMP60-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// OMP60-NEXT: store i32 [[TMP0]], ptr [[B]], align 4
+// OMP60-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str1, i64 4, i1 false)
+// OMP60-NEXT: store ptr @.str, ptr [[STR2]], align 8
+// OMP60-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str3, i64 4, i1 false)
+// OMP60-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8
+// OMP60-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB25:[0-9]+]], i32 2, ptr @.str.3)
+// OMP60-NEXT: [[ARRAYDECAY2:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB27:[0-9]+]], i32 2, ptr [[ARRAYDECAY2]])
+// OMP60-NEXT: [[TMP1:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB29:[0-9]+]], i32 2, ptr [[TMP1]])
+// OMP60-NEXT: [[ARRAYDECAY3:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB31:[0-9]+]], i32 2, ptr [[ARRAYDECAY3]])
+// OMP60-NEXT: [[TMP2:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB33:[0-9]+]], i32 2, ptr [[TMP2]])
+// OMP60-NEXT: [[TMP3:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB35:[0-9]+]], i32 2, ptr [[TMP3]])
+// OMP60-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8
+// OMP60-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP4]], i64 0
+// OMP60-NEXT: [[TMP5:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8
+// OMP60-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 0
+// OMP60-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1
+// OMP60-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32
+// OMP60-NEXT: store i32 [[CONV]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// OMP60-NEXT: [[TMP7:%.*]] = load i32, ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// OMP60-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// OMP60-NEXT: store i32 [[INC]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB37:[0-9]+]], i32 1, ptr @.str.4)
+// OMP60-NEXT: [[ARRAYDECAY5:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB39:[0-9]+]], i32 1, ptr [[ARRAYDECAY5]])
+// OMP60-NEXT: [[TMP8:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB41:[0-9]+]], i32 1, ptr [[TMP8]])
+// OMP60-NEXT: [[ARRAYDECAY6:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB43:[0-9]+]], i32 1, ptr [[ARRAYDECAY6]])
+// OMP60-NEXT: [[TMP9:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB45:[0-9]+]], i32 1, ptr [[TMP9]])
+// OMP60-NEXT: [[TMP10:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB47:[0-9]+]], i32 1, ptr [[TMP10]])
+// OMP60-NEXT: store i32 10, ptr [[B7]], align 4
+// OMP60-NEXT: store i32 100, ptr [[C8]], align 4
+// OMP60-NEXT: [[TMP11:%.*]] = load i32, ptr [[B7]], align 4
+// OMP60-NEXT: [[TMP12:%.*]] = load i32, ptr [[C8]], align 4
+// OMP60-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
+// OMP60-NEXT: store i32 [[ADD]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB49:[0-9]+]], i32 2, ptr @.str.1)
+// OMP60-NEXT: [[ARRAYDECAY9:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR1]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB51:[0-9]+]], i32 2, ptr [[ARRAYDECAY9]])
+// OMP60-NEXT: [[TMP13:%.*]] = load ptr, ptr [[STR2]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB53:[0-9]+]], i32 2, ptr [[TMP13]])
+// OMP60-NEXT: [[ARRAYDECAY10:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB55:[0-9]+]], i32 2, ptr [[ARRAYDECAY10]])
+// OMP60-NEXT: [[TMP14:%.*]] = load ptr, ptr [[STR4]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB57:[0-9]+]], i32 2, ptr [[TMP14]])
+// OMP60-NEXT: [[TMP15:%.*]] = load ptr, ptr [[STR5]], align 8
+// OMP60-NEXT: call void @__kmpc_error(ptr @[[GLOB59:[0-9]+]], i32 2, ptr [[TMP15]])
+// OMP60-NEXT: call void @_Z3foov()
+// OMP60-NEXT: ret i32 10
+//
+//
+// SIMD-LABEL: define dso_local void @_Z3foov(
+// SIMD-SAME: ) #[[ATTR0:[0-9]+]] !dbg [[DBG29:![0-9]+]] {
+// SIMD-NEXT: [[ENTRY:.*:]]
+// SIMD-NEXT: ret void, !dbg [[DBG32:![0-9]+]]
+//
+//
+// SIMD-LABEL: define dso_local noundef i32 @main(
+// SIMD-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG2:![0-9]+]] {
+// SIMD-NEXT: [[ENTRY:.*:]]
+// SIMD-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// SIMD-NEXT: [[B:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[C:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[D:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[E:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[F:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[G:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1
+// SIMD-NEXT: [[STR2:%.*]] = alloca ptr, align 8
+// SIMD-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1
+// SIMD-NEXT: [[STR4:%.*]] = alloca ptr, align 8
+// SIMD-NEXT: [[STR5:%.*]] = alloca ptr, align 8
+// SIMD-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// SIMD-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META33:![0-9]+]], !DIExpression(), [[META34:![0-9]+]])
+// SIMD-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+// SIMD-NEXT: #dbg_declare(ptr [[ARGV_ADDR]], [[META35:![0-9]+]], !DIExpression(), [[META36:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[B]], [[META37:![0-9]+]], !DIExpression(), [[META38:![0-9]+]])
+// SIMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !dbg [[DBG39:![0-9]+]]
+// SIMD-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !dbg [[META38]]
+// SIMD-NEXT: #dbg_declare(ptr [[C]], [[META40:![0-9]+]], !DIExpression(), [[META41:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[D]], [[META42:![0-9]+]], !DIExpression(), [[META43:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[E]], [[META44:![0-9]+]], !DIExpression(), [[META45:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[F]], [[META46:![0-9]+]], !DIExpression(), [[META47:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[G]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[STR1]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]])
+// SIMD-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const.main.str1, i64 4, i1 false), !dbg [[META51]]
+// SIMD-NEXT: #dbg_declare(ptr [[STR2]], [[META52:![0-9]+]], !DIExpression(), [[META54:![0-9]+]])
+// SIMD-NEXT: store ptr @.str, ptr [[STR2]], align 8, !dbg [[META54]]
+// SIMD-NEXT: #dbg_declare(ptr [[STR3]], [[META55:![0-9]+]], !DIExpression(), [[META57:![0-9]+]])
+// SIMD-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const.main.str3, i64 4, i1 false), !dbg [[META57]]
+// SIMD-NEXT: #dbg_declare(ptr [[STR4]], [[META58:![0-9]+]], !DIExpression(), [[META59:![0-9]+]])
+// SIMD-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0, !dbg [[DBG60:![0-9]+]]
+// SIMD-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8, !dbg [[META59]]
+// SIMD-NEXT: #dbg_declare(ptr [[STR5]], [[META61:![0-9]+]], !DIExpression(), [[META63:![0-9]+]])
+// SIMD-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0, !dbg [[DBG64:![0-9]+]]
+// SIMD-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8, !dbg [[META63]]
+// SIMD-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4, !dbg [[DBG65:![0-9]+]]
+// SIMD-NEXT: call void @_Z3foov(), !dbg [[DBG66:![0-9]+]]
+// SIMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !dbg [[DBG67:![0-9]+]]
+// SIMD-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG68:![0-9]+]]
+// SIMD-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiLi10EEiT_PPc(i32 noundef [[TMP1]], ptr noundef [[TMP2]]), !dbg [[DBG69:![0-9]+]]
+// SIMD-NEXT: ret i32 0, !dbg [[DBG70:![0-9]+]]
+//
+//
+// SIMD-LABEL: define linkonce_odr noundef i32 @_Z5tmainIiLi10EEiT_PPc(
+// SIMD-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0]] comdat !dbg [[DBG21:![0-9]+]] {
+// SIMD-NEXT: [[ENTRY:.*:]]
+// SIMD-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
+// SIMD-NEXT: [[B:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[C:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[D:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[E:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[F:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[G:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[STR1:%.*]] = alloca [4 x i8], align 1
+// SIMD-NEXT: [[STR2:%.*]] = alloca ptr, align 8
+// SIMD-NEXT: [[STR3:%.*]] = alloca [4 x i8], align 1
+// SIMD-NEXT: [[STR4:%.*]] = alloca ptr, align 8
+// SIMD-NEXT: [[STR5:%.*]] = alloca ptr, align 8
+// SIMD-NEXT: [[B3:%.*]] = alloca i32, align 4
+// SIMD-NEXT: [[C4:%.*]] = alloca i32, align 4
+// SIMD-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// SIMD-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META72:![0-9]+]])
+// SIMD-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8
+// SIMD-NEXT: #dbg_declare(ptr [[ARGV_ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META74:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[B]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]])
+// SIMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !dbg [[DBG77:![0-9]+]]
+// SIMD-NEXT: store i32 [[TMP0]], ptr [[B]], align 4, !dbg [[META76]]
+// SIMD-NEXT: #dbg_declare(ptr [[C]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[D]], [[META80:![0-9]+]], !DIExpression(), [[META81:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[E]], [[META82:![0-9]+]], !DIExpression(), [[META83:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[F]], [[META84:![0-9]+]], !DIExpression(), [[META85:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[G]], [[META86:![0-9]+]], !DIExpression(), [[META87:![0-9]+]])
+// SIMD-NEXT: #dbg_declare(ptr [[STR1]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]])
+// SIMD-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR1]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str1, i64 4, i1 false), !dbg [[META89]]
+// SIMD-NEXT: #dbg_declare(ptr [[STR2]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]])
+// SIMD-NEXT: store ptr @.str, ptr [[STR2]], align 8, !dbg [[META91]]
+// SIMD-NEXT: #dbg_declare(ptr [[STR3]], [[META92:![0-9]+]], !DIExpression(), [[META93:![0-9]+]])
+// SIMD-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[STR3]], ptr align 1 @__const._Z5tmainIiLi10EEiT_PPc.str3, i64 4, i1 false), !dbg [[META93]]
+// SIMD-NEXT: #dbg_declare(ptr [[STR4]], [[META94:![0-9]+]], !DIExpression(), [[META95:![0-9]+]])
+// SIMD-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0, !dbg [[DBG96:![0-9]+]]
+// SIMD-NEXT: store ptr [[ARRAYDECAY]], ptr [[STR4]], align 8, !dbg [[META95]]
+// SIMD-NEXT: #dbg_declare(ptr [[STR5]], [[META97:![0-9]+]], !DIExpression(), [[META98:![0-9]+]])
+// SIMD-NEXT: [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i8], ptr [[STR3]], i64 0, i64 0, !dbg [[DBG99:![0-9]+]]
+// SIMD-NEXT: store ptr [[ARRAYDECAY1]], ptr [[STR5]], align 8, !dbg [[META98]]
+// SIMD-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !dbg [[DBG100:![0-9]+]]
+// SIMD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i64 0, !dbg [[DBG100]]
+// SIMD-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG100]]
+// SIMD-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 0, !dbg [[DBG100]]
+// SIMD-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1, !dbg [[DBG100]]
+// SIMD-NEXT: [[CONV:%.*]] = sext i8 [[TMP3]] to i32, !dbg [[DBG100]]
+// SIMD-NEXT: store i32 [[CONV]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4, !dbg [[DBG101:![0-9]+]]
+// SIMD-NEXT: [[TMP4:%.*]] = load i32, ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4, !dbg [[DBG102:![0-9]+]]
+// SIMD-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG102]]
+// SIMD-NEXT: store i32 [[INC]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4, !dbg [[DBG102]]
+// SIMD-NEXT: #dbg_declare(ptr [[B3]], [[META103:![0-9]+]], !DIExpression(), [[META105:![0-9]+]])
+// SIMD-NEXT: store i32 10, ptr [[B3]], align 4, !dbg [[META105]]
+// SIMD-NEXT: #dbg_declare(ptr [[C4]], [[META106:![0-9]+]], !DIExpression(), [[META107:![0-9]+]])
+// SIMD-NEXT: store i32 100, ptr [[C4]], align 4, !dbg [[META107]]
+// SIMD-NEXT: [[TMP5:%.*]] = load i32, ptr [[B3]], align 4, !dbg [[DBG108:![0-9]+]]
+// SIMD-NEXT: [[TMP6:%.*]] = load i32, ptr [[C4]], align 4, !dbg [[DBG109:![0-9]+]]
+// SIMD-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]], !dbg [[DBG110:![0-9]+]]
+// SIMD-NEXT: store i32 [[ADD]], ptr @_ZZ5tmainIiLi10EEiT_PPcE1a, align 4, !dbg [[DBG111:![0-9]+]]
+// SIMD-NEXT: call void @_Z3foov(), !dbg [[DBG112:![0-9]+]]
+// SIMD-NEXT: ret i32 10, !dbg [[DBG113:![0-9]+]]
+//
+//.
+// SIMD: [[META0:![0-9]+]] = !DIGlobalVariableExpression(var: [[META1:![0-9]+]], expr: !DIExpression())
+// SIMD: [[META1]] = distinct !DIGlobalVariable(name: "a", scope: [[DBG2]], file: [[META3:![0-9]+]], line: 61, type: [[META6:![0-9]+]], isLocal: true, isDefinition: true)
+// SIMD: [[DBG2]] = distinct !DISubprogram(name: "main", scope: [[META3]], file: [[META3]], line: 59, type: [[META4:![0-9]+]], scopeLine: 59, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META10:![0-9]+]], retainedNodes: [[META22:![0-9]+]])
+// SIMD: [[META3]] = !DIFile(filename: "{{.*}}error_codegen.cpp", directory: {{.*}})
+// SIMD: [[META4]] = !DISubroutineType(types: [[META5:![0-9]+]])
+// SIMD: [[META5]] = !{[[META6]], [[META6]], [[META7:![0-9]+]]}
+// SIMD: [[META6]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+// SIMD: [[META7]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META8:![0-9]+]], size: 64)
+// SIMD: [[META8]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META9:![0-9]+]], size: 64)
+// SIMD: [[META9]] = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+// SIMD: [[META10]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_11, file: [[META11:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: [[META12:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
+// SIMD: [[META11]] = !DIFile(filename: "{{.*}}<stdin>", directory: {{.*}})
+// SIMD: [[META12]] = !{[[META0]], [[META13:![0-9]+]], [[META19:![0-9]+]]}
+// SIMD: [[META13]] = !DIGlobalVariableExpression(var: [[META14:![0-9]+]], expr: !DIExpression())
+// SIMD: [[META14]] = distinct !DIGlobalVariable(scope: null, file: [[META3]], line: 63, type: [[META15:![0-9]+]], isLocal: true, isDefinition: true)
+// SIMD: [[META15]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META16:![0-9]+]], size: 32, elements: [[META17:![0-9]+]])
+// SIMD: [[META16]] = !DIDerivedType(tag: DW_TAG_const_type, baseType: [[META9]])
+// SIMD: [[META17]] = !{[[META18:![0-9]+]]}
+// SIMD: [[META18]] = !DISubrange(count: 4)
+// SIMD: [[META19]] = !DIGlobalVariableExpression(var: [[META20:![0-9]+]], expr: !DIExpression())
+// SIMD: [[META20]] = distinct !DIGlobalVariable(name: "a", scope: [[DBG21]], file: [[META3]], line: 22, type: [[META6]], isLocal: false, isDefinition: true)
+// SIMD: [[DBG21]] = distinct !DISubprogram(name: "tmain<int, 10>", linkageName: "_Z5tmainIiLi10EEiT_PPc", scope: [[META3]], file: [[META3]], line: 20, type: [[META4]], scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META10]], templateParams: [[META23:![0-9]+]], retainedNodes: [[META22]])
+// SIMD: [[META22]] = !{}
+// SIMD: [[META23]] = !{[[META24:![0-9]+]], [[META25:![0-9]+]]}
+// SIMD: [[META24]] = !DITemplateTypeParameter(name: "T", type: [[META6]])
+// SIMD: [[META25]] = !DITemplateValueParameter(name: "N", type: [[META6]], value: i32 10)
+// SIMD: [[DBG29]] = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: [[META3]], file: [[META3]], line: 17, type: [[META30:![0-9]+]], scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META10]])
+// SIMD: [[META30]] = !DISubroutineType(types: [[META31:![0-9]+]])
+// SIMD: [[META31]] = !{null}
+// SIMD: [[DBG32]] = !DILocation(line: 17, column: 13, scope: [[DBG29]])
+// SIMD: [[META33]] = !DILocalVariable(name: "argc", arg: 1, scope: [[DBG2]], file: [[META3]], line: 59, type: [[META6]])
+// SIMD: [[META34]] = !DILocation(line: 59, column: 15, scope: [[DBG2]])
+// SIMD: [[META35]] = !DILocalVariable(name: "argv", arg: 2, scope: [[DBG2]], file: [[META3]], line: 59, type: [[META7]])
+// SIMD: [[META36]] = !DILocation(line: 59, column: 28, scope: [[DBG2]])
+// SIMD: [[META37]] = !DILocalVariable(name: "b", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]])
+// SIMD: [[META38]] = !DILocation(line: 60, column: 7, scope: [[DBG2]])
+// SIMD: [[DBG39]] = !DILocation(line: 60, column: 11, scope: [[DBG2]])
+// SIMD: [[META40]] = !DILocalVariable(name: "c", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]])
+// SIMD: [[META41]] = !DILocation(line: 60, column: 17, scope: [[DBG2]])
+// SIMD: [[META42]] = !DILocalVariable(name: "d", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]])
+// SIMD: [[META43]] = !DILocation(line: 60, column: 20, scope: [[DBG2]])
+// SIMD: [[META44]] = !DILocalVariable(name: "e", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]])
+// SIMD: [[META45]] = !DILocation(line: 60, column: 23, scope: [[DBG2]])
+// SIMD: [[META46]] = !DILocalVariable(name: "f", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]])
+// SIMD: [[META47]] = !DILocation(line: 60, column: 26, scope: [[DBG2]])
+// SIMD: [[META48]] = !DILocalVariable(name: "g", scope: [[DBG2]], file: [[META3]], line: 60, type: [[META6]])
+// SIMD: [[META49]] = !DILocation(line: 60, column: 29, scope: [[DBG2]])
+// SIMD: [[META50]] = !DILocalVariable(name: "str1", scope: [[DBG2]], file: [[META3]], line: 62, type: [[META15]])
+// SIMD: [[META51]] = !DILocation(line: 62, column: 14, scope: [[DBG2]])
+// SIMD: [[META52]] = !DILocalVariable(name: "str2", scope: [[DBG2]], file: [[META3]], line: 63, type: [[META53:![0-9]+]])
+// SIMD: [[META53]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META16]], size: 64)
+// SIMD: [[META54]] = !DILocation(line: 63, column: 15, scope: [[DBG2]])
+// SIMD: [[META55]] = !DILocalVariable(name: "str3", scope: [[DBG2]], file: [[META3]], line: 64, type: [[META56:![0-9]+]])
+// SIMD: [[META56]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[META9]], size: 32, elements: [[META17]])
+// SIMD: [[META57]] = !DILocation(line: 64, column: 8, scope: [[DBG2]])
+// SIMD: [[META58]] = !DILocalVariable(name: "str4", scope: [[DBG2]], file: [[META3]], line: 65, type: [[META8]])
+// SIMD: [[META59]] = !DILocation(line: 65, column: 9, scope: [[DBG2]])
+// SIMD: [[DBG60]] = !DILocation(line: 65, column: 16, scope: [[DBG2]])
+// SIMD: [[META61]] = !DILocalVariable(name: "str5", scope: [[DBG2]], file: [[META3]], line: 66, type: [[META62:![0-9]+]])
+// SIMD: [[META62]] = !DIDerivedType(tag: DW_TAG_const_type, baseType: [[META8]])
+// SIMD: [[META63]] = !DILocation(line: 66, column: 16, scope: [[DBG2]])
+// SIMD: [[DBG64]] = !DILocation(line: 66, column: 23, scope: [[DBG2]])
+// SIMD: [[DBG65]] = !DILocation(line: 73, column: 5, scope: [[DBG2]])
+// SIMD: [[DBG66]] = !DILocation(line: 80, column: 3, scope: [[DBG2]])
+// SIMD: [[DBG67]] = !DILocation(line: 81, column: 18, scope: [[DBG2]])
+// SIMD: [[DBG68]] = !DILocation(line: 81, column: 24, scope: [[DBG2]])
+// SIMD: [[DBG69]] = !DILocation(line: 81, column: 3, scope: [[DBG2]])
+// SIMD: [[DBG70]] = !DILocation(line: 82, column: 1, scope: [[DBG2]])
+// SIMD: [[META71]] = !DILocalVariable(name: "argc", arg: 1, scope: [[DBG21]], file: [[META3]], line: 20, type: [[META6]])
+// SIMD: [[META72]] = !DILocation(line: 20, column: 13, scope: [[DBG21]])
+// SIMD: [[META73]] = !DILocalVariable(name: "argv", arg: 2, scope: [[DBG21]], file: [[META3]], line: 20, type: [[META7]])
+// SIMD: [[META74]] = !DILocation(line: 20, column: 26, scope: [[DBG21]])
+// SIMD: [[META75]] = !DILocalVariable(name: "b", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]])
+// SIMD: [[META76]] = !DILocation(line: 21, column: 5, scope: [[DBG21]])
+// SIMD: [[DBG77]] = !DILocation(line: 21, column: 9, scope: [[DBG21]])
+// SIMD: [[META78]] = !DILocalVariable(name: "c", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]])
+// SIMD: [[META79]] = !DILocation(line: 21, column: 15, scope: [[DBG21]])
+// SIMD: [[META80]] = !DILocalVariable(name: "d", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]])
+// SIMD: [[META81]] = !DILocation(line: 21, column: 18, scope: [[DBG21]])
+// SIMD: [[META82]] = !DILocalVariable(name: "e", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]])
+// SIMD: [[META83]] = !DILocation(line: 21, column: 21, scope: [[DBG21]])
+// SIMD: [[META84]] = !DILocalVariable(name: "f", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]])
+// SIMD: [[META85]] = !DILocation(line: 21, column: 24, scope: [[DBG21]])
+// SIMD: [[META86]] = !DILocalVariable(name: "g", scope: [[DBG21]], file: [[META3]], line: 21, type: [[META6]])
+// SIMD: [[META87]] = !DILocation(line: 21, column: 27, scope: [[DBG21]])
+// SIMD: [[META88]] = !DILocalVariable(name: "str1", scope: [[DBG21]], file: [[META3]], line: 23, type: [[META15]])
+// SIMD: [[META89]] = !DILocation(line: 23, column: 14, scope: [[DBG21]])
+// SIMD: [[META90]] = !DILocalVariable(name: "str2", scope: [[DBG21]], file: [[META3]], line: 24, type: [[META53]])
+// SIMD: [[META91]] = !DILocation(line: 24, column: 15, scope: [[DBG21]])
+// SIMD: [[META92]] = !DILocalVariable(name: "str3", scope: [[DBG21]], file: [[META3]], line: 25, type: [[META56]])
+// SIMD: [[META93]] = !DILocation(line: 25, column: 8, scope: [[DBG21]])
+// SIMD: [[META94]] = !DILocalVariable(name: "str4", scope: [[DBG21]], file: [[META3]], line: 26, type: [[META8]])
+// SIMD: [[META95]] = !DILocation(line: 26, column: 9, scope: [[DBG21]])
+// SIMD: [[DBG96]] = !DILocation(line: 26, column: 16, scope: [[DBG21]])
+// SIMD: [[META97]] = !DILocalVariable(name: "str5", scope: [[DBG21]], file: [[META3]], line: 27, type: [[META62]])
+// SIMD: [[META98]] = !DILocation(line: 27, column: 16, scope: [[DBG21]])
+// SIMD: [[DBG99]] = !DILocation(line: 27, column: 23, scope: [[DBG21]])
+// SIMD: [[DBG100]] = !DILocation(line: 34, column: 7, scope: [[DBG21]])
+// SIMD: [[DBG101]] = !DILocation(line: 34, column: 5, scope: [[DBG21]])
+// SIMD: [[DBG102]] = !DILocation(line: 35, column: 3, scope: [[DBG21]])
+// SIMD: [[META103]] = !DILocalVariable(name: "b", scope: [[META104:![0-9]+]], file: [[META3]], line: 43, type: [[META6]])
+// SIMD: [[META104]] = distinct !DILexicalBlock(scope: [[DBG21]], file: [[META3]], line: 42, column: 3)
+// SIMD: [[META105]] = !DILocation(line: 43, column: 9, scope: [[META104]])
+// SIMD: [[META106]] = !DILocalVariable(name: "c", scope: [[META104]], file: [[META3]], line: 44, type: [[META6]])
+// SIMD: [[META107]] = !DILocation(line: 44, column: 7, scope: [[META104]])
+// SIMD: [[DBG108]] = !DILocation(line: 45, column: 9, scope: [[META104]])
+// SIMD: [[DBG109]] = !DILocation(line: 45, column: 13, scope: [[META104]])
+// SIMD: [[DBG110]] = !DILocation(line: 45, column: 11, scope: [[META104]])
+// SIMD: [[DBG111]] = !DILocation(line: 45, column: 7, scope: [[META104]])
+// SIMD: [[DBG112]] = !DILocation(line: 53, column: 3, scope: [[DBG21]])
+// SIMD: [[DBG113]] = !DILocation(line: 54, column: 1, scope: [[DBG21]])
+//.
diff --git a/clang/test/OpenMP/error_message.cpp b/clang/test/OpenMP/error_message.cpp
index aed2df99ea8f6..6e64ee2ad4a67 100644
--- a/clang/test/OpenMP/error_message.cpp
+++ b/clang/test/OpenMP/error_message.cpp
@@ -112,8 +112,12 @@ if (1)
// expected-error at +1 {{GPU compiler is needed.}}
#pragma omp error message("GPU compiler is needed.") message("GPU compiler is needed.") // expected-error {{directive '#pragma omp error' cannot contain more than one 'message' clause}}
int a;
-// expected-warning at +1 {{expected string literal in 'clause message' - ignoring}}
+// expected-warning at +1 {{expected string in 'clause message' - ignoring}}
#pragma omp error message(a) // expected-error {{ERROR}}
+ char str[] = "msg";
+// expected-warning at +1 {{expected string literal in 'clause message' - ignoring}}
+#pragma omp error message(str) // expected-error {{ERROR}}
+#pragma omp error at(execution) message(str) // no error
// expected-error at +1 {{ERROR}}
#pragma omp error message() // expected-error {{expected expression}}
return T();
diff --git a/clang/test/OpenMP/parallel_message_messages.cpp b/clang/test/OpenMP/parallel_message_messages.cpp
index 470fadc032280..ea8fa23ef5e53 100644
--- a/clang/test/OpenMP/parallel_message_messages.cpp
+++ b/clang/test/OpenMP/parallel_message_messages.cpp
@@ -8,6 +8,9 @@ T tmain(T argc, S **argv) {
// Correct usage
#pragma omp parallel message("correct message")
+ // Template parameter is not yet instantiated.
+ #pragma omp parallel message(argv[0]) // expected-warning {{expected string in 'clause message' - ignoring}}
+
// Missing parentheses
#pragma omp parallel message // expected-error {{expected '(' after 'message'}}
@@ -15,9 +18,8 @@ T tmain(T argc, S **argv) {
#pragma omp parallel message() // expected-error {{expected expression}}
// Non-string literal
- #pragma omp parallel message(123) // expected-warning {{expected string literal in 'clause message' - ignoring}}
- #pragma omp parallel message(argc) // expected-warning {{expected string literal in 'clause message' - ignoring}}
- #pragma omp parallel message(argv[0]) // expected-warning {{expected string literal in 'clause message' - ignoring}}
+ #pragma omp parallel message(123) // expected-warning {{expected string in 'clause message' - ignoring}}
+ #pragma omp parallel message(argc) // expected-warning {{expected string in 'clause message' - ignoring}}
// Multiple arguments
#pragma omp parallel message("msg1", "msg2") // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -47,10 +49,10 @@ T tmain(T argc, S **argv) {
// Message clause with macro that is not a string
#define NOT_A_STRING 123
- #pragma omp parallel message(NOT_A_STRING) // expected-warning {{expected string literal in 'clause message' - ignoring}}
+ #pragma omp parallel message(NOT_A_STRING) // expected-warning {{expected string in 'clause message' - ignoring}}
// Message clause with template parameter that is not a string
- #pragma omp parallel message(N) // expected-warning {{expected string literal in 'clause message' - ignoring}}
+ #pragma omp parallel message(N) // expected-warning {{expected string in 'clause message' - ignoring}}
// Message clause with macro that is a string
#define A_STRING "macro string"
@@ -73,15 +75,29 @@ T tmain(T argc, S **argv) {
return argc;
}
+template<int N> int tmain(int argc, char **argv);
+
int main(int argc, char **argv) {
+ const char str1[] = "msg";
+ const char *str2 = "msg";
+ char str3[] = "msg";
+ char *str4 = str3;
+ char * const str5 = str3;
+
// Correct usage
#pragma omp parallel message("main correct")
+ #pragma omp parallel message(argv[0])
+ #pragma omp parallel message(str1)
+ #pragma omp parallel message(str2)
+ #pragma omp parallel message(str3)
+ #pragma omp parallel message(str4)
+ #pragma omp parallel message(str5)
// Invalid: missing string
#pragma omp parallel message() // expected-error {{expression}}
// Invalid: non-string
- #pragma omp parallel message(argc) // expected-warning {{expected string literal in 'clause message' - ignoring}}
+ #pragma omp parallel message(argc) // expected-warning {{expected string in 'clause message' - ignoring}}
foo();
diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
index 2fb9589ab1ab6..9b5d02cd41678 100644
--- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
@@ -42,9 +42,11 @@ int tmain() {
#pragma omp parallel num_threads(T(23))
foo();
#ifdef OMP60
-#pragma omp parallel num_threads(strict: C)
+ char str[] = "msg";
+ const char *str1 = "msg1";
+#pragma omp parallel num_threads(strict: C) severity(fatal) message(str)
foo();
-#pragma omp parallel num_threads(strict: T(23))
+#pragma omp parallel num_threads(strict: T(23)) severity(warning) message(str1)
foo();
#endif
return 0;
@@ -58,9 +60,11 @@ int main() {
#pragma omp parallel num_threads(a)
foo();
#ifdef OMP60
-#pragma omp parallel num_threads(strict: 2)
+ char str[] = "msg";
+ const char *str1 = "msg1";
+#pragma omp parallel num_threads(strict: 2) severity(fatal) message(str)
foo();
-#pragma omp parallel num_threads(strict: a)
+#pragma omp parallel num_threads(strict: a) severity(warning) message(str1)
foo();
#endif
return a + tmain<char, 5>() + tmain<S, 1>();
@@ -79,6 +83,13 @@ int main() {
// CHECK: [[RES:%.+]] = sext i8 [[A_VAL]] to i32
// CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
+// OMP60: [[ARRDECAY:%.+]] = getelementptr inbounds [4 x i8], ptr [[STR:%.+]], i64 0, i64 0
+// OMP60: call void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 2, i32 2, ptr [[ARRDECAY]])
+// OMP60: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(
+// OMP60: [[A_VAL1:%.+]] = load i8, ptr [[A_ADDR]]
+// OMP60: [[RES1:%.+]] = sext i8 [[A_VAL1]] to i32
+// OMP60: call void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES1]], i32 1, ptr [[STR2:%.+]])
+// OMP60: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(
// CHECK: invoke{{.*}} [[INT_TY:i[0-9]+]] [[TMAIN_CHAR_5:@.+]]()
// CHECK: invoke{{.*}} [[INT_TY]] [[TMAIN_S_1:@.+]]()
// CHECK: call {{.*}} [[S_TY_DESTR:@.+]](ptr {{[^,]*}} [[S_ADDR]])
@@ -91,9 +102,10 @@ int main() {
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
// CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23)
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
-// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 5, i32 2, ptr null)
+// OMP60: [[ARRDECAY:%.+]] = getelementptr inbounds [4 x i8], ptr [[STR:%.+]], i64 0, i64 0
+// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 5, i32 2, ptr [[ARRDECAY]])
// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call(
-// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23, i32 2, ptr null)
+// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 23, i32 1, ptr [[STR1:%.+]])
// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call(
// CHECK: ret [[INT_TY]] 0
// CHECK-NEXT: }
@@ -108,12 +120,13 @@ int main() {
// CHECK: call {{.*}}void @__kmpc_push_num_threads(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES]])
// CHECK: {{(invoke|call)}} {{.*}} [[S_TY_DESTR]](ptr {{[^,]*}} [[S_TEMP]])
// CHECK: call {{.*}}void {{.*}} @__kmpc_fork_call(
-// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 1, i32 2, ptr null)
+// OMP60: [[ARRDECAY:%.+]] = getelementptr inbounds [4 x i8], ptr [[STR:%.+]], i64 0, i64 0
+// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 1, i32 2, ptr [[ARRDECAY]])
// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call(
// OMP60: {{(invoke|call)}} {{.*}} [[S_TY_CONSTR]](ptr {{[^,]*}} [[S_TEMP:%.+]], [[INTPTR_T_TY]] noundef [[INTPTR_T_TY_ATTR]]23)
// OMP60: [[S_CHAR_OP1:%.+]] = invoke{{.*}} i8 [[S_TY_CHAR_OP]](ptr {{[^,]*}} [[S_TEMP]])
// OMP60: [[RES1:%.+]] = sext {{.*}}i8 [[S_CHAR_OP1]] to i32
-// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES1]], i32 2, ptr null)
+// OMP60: call {{.*}}void @__kmpc_push_num_threads_strict(ptr [[DEF_LOC_2]], i32 [[GTID]], i32 [[RES1]], i32 1, ptr [[STR1:%.+]])
// OMP60: {{(invoke|call)}} {{.*}} [[S_TY_DESTR]](ptr {{[^,]*}} [[S_TEMP]])
// OMP60: call {{.*}}void {{.*}} @__kmpc_fork_call(
// CHECK: ret [[INT_TY]] 0
>From ee91aeb6d22b744789612c863add25b0ee337e40 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 31 Jul 2025 10:12:08 -0500
Subject: [PATCH 5/6] implement feedback
---
clang/include/clang/AST/OpenMPClause.h | 21 +++++++------------
clang/lib/AST/OpenMPClause.cpp | 2 +-
clang/lib/Sema/SemaOpenMP.cpp | 29 ++++++++++----------------
3 files changed, 20 insertions(+), 32 deletions(-)
diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index b5dabf283964e..cad1b70892838 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -1874,10 +1874,6 @@ class OMPMessageClause final : public OMPClause {
// Expression of the 'message' clause.
Stmt *MessageString = nullptr;
- // The message as a StringLiteral in case it is as string literal. This might
- // be needed during compile time.
- StringLiteral *MessageStringLiteral = nullptr;
-
/// Set message string of the clause.
void setMessageString(Expr *MS) { MessageString = MS; }
@@ -1891,14 +1887,10 @@ class OMPMessageClause final : public OMPClause {
/// \param StartLoc Starting location of the clause.
/// \param LParenLoc Location of '('.
/// \param EndLoc Ending location of the clause.
- /// \param MessageStringLiteral The message as a StringLiteral in case it is
- /// as string literal. This might be needed during compile time.
OMPMessageClause(Expr *MS, SourceLocation StartLoc, SourceLocation LParenLoc,
- SourceLocation EndLoc,
- StringLiteral *MessageStringLiteral = nullptr)
+ SourceLocation EndLoc)
: OMPClause(llvm::omp::OMPC_message, StartLoc, EndLoc),
- LParenLoc(LParenLoc), MessageString(MS),
- MessageStringLiteral(MessageStringLiteral) {}
+ LParenLoc(LParenLoc), MessageString(MS) {}
/// Build an empty clause.
OMPMessageClause()
@@ -1911,9 +1903,12 @@ class OMPMessageClause final : public OMPClause {
/// Returns message string of the clause.
Expr *getMessageString() const { return cast_or_null<Expr>(MessageString); }
- // Returns the source message as a string literal in case it has been a string
- // literal. Otherwise, return null.
- StringLiteral *getAsStringLiteral() const { return MessageStringLiteral; }
+ /// Try to evaluate the message string at compile time.
+ std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const {
+ if (Expr *MessageExpr = getMessageString())
+ return MessageExpr->tryEvaluateString(Ctx);
+ return std::nullopt;
+ }
child_range children() {
return child_range(&MessageString, &MessageString + 1);
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index b561156cd2d20..bc4480bf30d37 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1964,7 +1964,7 @@ void OMPClausePrinter::VisitOMPSeverityClause(OMPSeverityClause *Node) {
void OMPClausePrinter::VisitOMPMessageClause(OMPMessageClause *Node) {
OS << "message(";
- if (StringLiteral *SL = Node->getAsStringLiteral())
+ if (StringLiteral *SL = dyn_cast<StringLiteral>(Node->getMessageString()))
OS << "\"" << SL->getString() << "\"";
else if (Expr *E = Node->getMessageString())
E->printPretty(OS, nullptr, Policy);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 70e43379b64d2..157a6eb3162dc 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -11078,26 +11078,27 @@ StmtResult SemaOpenMP::ActOnOpenMPErrorDirective(ArrayRef<OMPClause *> Clauses,
return StmtError();
}
- const OMPSeverityClause *SeverityC =
- OMPExecutableDirective::getSingleClause<OMPSeverityClause>(Clauses);
- const OMPMessageClause *MessageC =
- OMPExecutableDirective::getSingleClause<OMPMessageClause>(Clauses);
-
if (!AtC || AtC->getAtKind() == OMPC_AT_compilation) {
- StringLiteral *SL = MessageC ? MessageC->getAsStringLiteral() : nullptr;
+ const OMPSeverityClause *SeverityC =
+ OMPExecutableDirective::getSingleClause<OMPSeverityClause>(Clauses);
+ const OMPMessageClause *MessageC =
+ OMPExecutableDirective::getSingleClause<OMPMessageClause>(Clauses);
+ std::optional<std::string> SL =
+ MessageC ? MessageC->tryEvaluateString(getASTContext()) : std::nullopt;
+
if (MessageC && !SL)
Diag(MessageC->getMessageString()->getBeginLoc(),
diag::warn_clause_expected_string_literal)
<< getOpenMPClauseNameForDiag(OMPC_message);
if (SeverityC && SeverityC->getSeverityKind() == OMPC_SEVERITY_warning)
Diag(SeverityC->getSeverityKindKwLoc(), diag::warn_diagnose_if_succeeded)
- << (SL ? SL->getString() : "WARNING");
+ << SL.value_or("WARNING");
else
- Diag(StartLoc, diag::err_diagnose_if_succeeded)
- << (SL ? SL->getString() : "ERROR");
+ Diag(StartLoc, diag::err_diagnose_if_succeeded) << SL.value_or("ERROR");
if (!SeverityC || SeverityC->getSeverityKind() != OMPC_SEVERITY_warning)
return StmtError();
}
+
return OMPErrorDirective::Create(getASTContext(), StartLoc, EndLoc, Clauses);
}
@@ -16457,20 +16458,12 @@ OMPClause *SemaOpenMP::ActOnOpenMPMessageClause(Expr *ME,
return nullptr;
}
- // If the string is a string literal, save it in case it is later needed
- // during compile time. Also consider a const char pointer to a string
- // literal. This is necessary for template instantiations where the string
- // literal is not passed directly and we only get a const char pointer to it.
- StringLiteral *SL = dyn_cast<StringLiteral>(
- isa<ImplicitCastExpr>(ME) ? cast<ImplicitCastExpr>(ME)->getSubExpr()
- : ME);
-
// Convert array type to pointer type if needed.
if (Type->isArrayType())
ME = SemaRef.DefaultFunctionArrayConversion(ME).get();
return new (getASTContext())
- OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc, SL);
+ OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc);
}
OMPClause *SemaOpenMP::ActOnOpenMPOrderClause(
>From 9ec7fea9108752fc04b2b8b2fcd8317cc6f05588 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Fri, 1 Aug 2025 09:07:16 -0500
Subject: [PATCH 6/6] adapt OMPClausePrinter::VisitOMPMessageClause; add lvalue
conversion to message clause
---
clang/lib/AST/OpenMPClause.cpp | 4 +---
clang/lib/Sema/SemaOpenMP.cpp | 3 +--
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index bc4480bf30d37..a9aa17a19d1ea 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -1964,9 +1964,7 @@ void OMPClausePrinter::VisitOMPSeverityClause(OMPSeverityClause *Node) {
void OMPClausePrinter::VisitOMPMessageClause(OMPMessageClause *Node) {
OS << "message(";
- if (StringLiteral *SL = dyn_cast<StringLiteral>(Node->getMessageString()))
- OS << "\"" << SL->getString() << "\"";
- else if (Expr *E = Node->getMessageString())
+ if (Expr *E = Node->getMessageString())
E->printPretty(OS, nullptr, Policy);
OS << ")";
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 157a6eb3162dc..229190a641080 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -16459,8 +16459,7 @@ OMPClause *SemaOpenMP::ActOnOpenMPMessageClause(Expr *ME,
}
// Convert array type to pointer type if needed.
- if (Type->isArrayType())
- ME = SemaRef.DefaultFunctionArrayConversion(ME).get();
+ ME = SemaRef.DefaultFunctionArrayLvalueConversion(ME).get();
return new (getASTContext())
OMPMessageClause(ME, StartLoc, LParenLoc, EndLoc);
More information about the llvm-branch-commits
mailing list