[clang] [llvm] [NVPTX] Change the alloca address space in NVPTXLowerAlloca (PR #154814)
Theodoros Theodoridis via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 2 10:10:55 PDT 2025
https://github.com/thetheodor updated https://github.com/llvm/llvm-project/pull/154814
>From 095073e292e212b938ae3e72c403faab09940a9c Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Tue, 12 Aug 2025 15:22:24 +0000
Subject: [PATCH 01/14] [NVPTX] Change the alloca address space in
NVPTXLowerAlloca
This patch refactors NVPTXLowerAlloca to produce simpler IR for allocas.
Previously, the implementation attached a pair of consecutive address
space casts to each alloca: one from addrspace(0) (generic) to
addrspace(5) (local), and another immediately back to addrspace(0).
Downstream passes needed to recognize this idiom to generate efficient
PTX. With this patch, NVPTXLowerAlloca directly changes the address
space of each alloca to "local" and inserts a single addrspacecast from
local back to generic. The InferAddressSpace pass can then remove the
remaining cast. This change results in fewer address-space-change (ctva)
instructions in the final PTX.
---
llvm/include/llvm/Target/TargetMachine.h | 2 +-
llvm/lib/IR/Verifier.cpp | 8 +
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 3 +-
llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp | 4 +-
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 5 +-
llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 124 ++++------
llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 3 +-
llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 10 +-
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 24 ++
llvm/lib/Target/NVPTX/NVPTXTargetMachine.h | 3 +
.../CodeGen/NVPTX/call-with-alloca-buffer.ll | 5 +-
.../NVPTX/dynamic-stackalloc-regression.ll | 14 +-
llvm/test/CodeGen/NVPTX/f32x2-instructions.ll | 7 +-
llvm/test/CodeGen/NVPTX/indirect_byval.ll | 22 +-
llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 67 +++--
llvm/test/CodeGen/NVPTX/lower-alloca.ll | 22 +-
.../CodeGen/NVPTX/lower-args-gridconstant.ll | 9 +-
llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 231 ++++++++----------
llvm/test/CodeGen/NVPTX/vaargs.ll | 22 +-
llvm/test/CodeGen/NVPTX/variadics-backend.ll | 109 ++++-----
.../DebugInfo/NVPTX/dbg-declare-alloca.ll | 3 +-
.../DebugInfo/NVPTX/dbg-value-const-byref.ll | 2 +-
22 files changed, 342 insertions(+), 357 deletions(-)
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index bf4e490554723..f4cd06bf93d40 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -208,7 +208,7 @@ class LLVM_ABI TargetMachine {
/// The LLVM Module owns a DataLayout that is used for the target independent
/// optimizations and code generation. This hook provides a target specific
/// check on the validity of this DataLayout.
- bool isCompatibleDataLayout(const DataLayout &Candidate) const {
+ virtual bool isCompatibleDataLayout(const DataLayout &Candidate) const {
return DL == Candidate;
}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 9d9b51db98702..2afa2bda53b52 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -114,6 +114,7 @@
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -4498,6 +4499,13 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
"alloca on amdgpu must be in addrspace(5)", &AI);
}
+ if (TT.isNVPTX()) {
+ Check(AI.getAddressSpace() == NVPTXAS::ADDRESS_SPACE_LOCAL ||
+ AI.getAddressSpace() == NVPTXAS::ADDRESS_SPACE_GENERIC,
+ "AllocaInst can only be in Generic or Local address space for NVPTX.",
+ &AI);
+ }
+
visitInstruction(AI);
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 7391c2d488b57..8528cb41d9244 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1484,7 +1484,8 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
if (NumBytes) {
O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
<< DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
- if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
+ if (static_cast<const NVPTXTargetMachine &>(MF.getTarget())
+ .getPointerSize(ADDRESS_SPACE_LOCAL) == 8) {
O << "\t.reg .b64 \t%SP;\n"
<< "\t.reg .b64 \t%SPL;\n";
} else {
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 47bc15f52bb96..78f18a93b869b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -48,8 +48,8 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
// for local address accesses in MF.
- bool Is64Bit =
- static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit();
+ bool Is64Bit = static_cast<const NVPTXTargetMachine &>(MF.getTarget())
+ .getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
unsigned CvtaLocalOpcode =
(Is64Bit ? NVPTX::cvta_local_64 : NVPTX::cvta_local);
unsigned MovDepotOpcode =
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index bb4bb1195f78b..f35407ed99106 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1803,10 +1803,7 @@ SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
{Chain, DAG.getZExtOrTrunc(Size, DL, LocalVT),
DAG.getTargetConstant(Align, DL, MVT::i32)});
- SDValue ASC = DAG.getAddrSpaceCast(
- DL, Op.getValueType(), Alloc, ADDRESS_SPACE_LOCAL, ADDRESS_SPACE_GENERIC);
-
- return DAG.getMergeValues({ASC, SDValue(Alloc.getNode(), 1)}, DL);
+ return Alloc;
}
SDValue NVPTXTargetLowering::LowerSTACKRESTORE(SDValue Op,
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 88bc000f39bf7..03412edb9e23c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -6,16 +6,16 @@
//
//===----------------------------------------------------------------------===//
//
-// For all alloca instructions, and add a pair of cast to local address for
-// each of them. For example,
+// Change the Module's DataLayout to have the local address space for alloca's.
+// Change the address space of each alloca to local and add an addrspacecast to
+// generic address space. For example,
//
// %A = alloca i32
// store i32 0, i32* %A ; emits st.u32
//
// will be transformed to
//
-// %A = alloca i32
-// %Local = addrspacecast i32* %A to i32 addrspace(5)*
+// %A = alloca i32, addrspace(5)
// %Generic = addrspacecast i32 addrspace(5)* %A to i32*
// store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32
//
@@ -24,18 +24,24 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
#include "NVPTX.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
using namespace llvm;
+using namespace NVPTXAS;
namespace {
class NVPTXLowerAlloca : public FunctionPass {
bool runOnFunction(Function &F) override;
+ bool doInitialization(Module &M) override;
public:
static char ID; // Pass identification, replacement for typeid
@@ -58,77 +64,55 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- bool Changed = false;
+ SmallVector<AllocaInst *, 16> Allocas;
for (auto &BB : F)
- for (auto &I : BB) {
- if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
- Changed = true;
+ for (auto &I : BB)
+ if (auto *Alloca = dyn_cast<AllocaInst>(&I))
+ if (Alloca->getAddressSpace() != ADDRESS_SPACE_LOCAL)
+ Allocas.push_back(Alloca);
+
+ if (Allocas.empty())
+ return false;
- PointerType *AllocInstPtrTy =
- cast<PointerType>(allocaInst->getType()->getScalarType());
- unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace();
- assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC ||
- AllocAddrSpace == ADDRESS_SPACE_LOCAL) &&
- "AllocaInst can only be in Generic or Local address space for "
- "NVPTX.");
+ for (AllocaInst *Alloca : Allocas) {
+ auto *NewAlloca = new AllocaInst(
+ Alloca->getAllocatedType(), ADDRESS_SPACE_LOCAL, Alloca->getArraySize(),
+ Alloca->getAlign(), Alloca->getName());
+ auto *Cast = new AddrSpaceCastInst(
+ NewAlloca,
+ PointerType::get(Alloca->getAllocatedType()->getContext(),
+ ADDRESS_SPACE_GENERIC),
+ "");
+ Cast->insertBefore(Alloca->getIterator());
+ NewAlloca->insertBefore(Cast->getIterator());
+ for (auto &U : llvm::make_early_inc_range(Alloca->uses())) {
+ auto *II = dyn_cast<IntrinsicInst>(U.getUser());
+ if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+ II->getIntrinsicID() != Intrinsic::lifetime_end))
+ continue;
- Instruction *AllocaInLocalAS = allocaInst;
- auto ETy = allocaInst->getAllocatedType();
+ IRBuilder<> Builder(II);
+ Builder.CreateIntrinsic(II->getIntrinsicID(), {NewAlloca->getType()},
+ {NewAlloca});
+ II->eraseFromParent();
+ }
- // We need to make sure that LLVM has info that alloca needs to go to
- // ADDRESS_SPACE_LOCAL for InferAddressSpace pass.
- //
- // For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to
- // ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that
- // the alloca's users still use a generic pointer to operate on.
- //
- // For allocas already in ADDRESS_SPACE_LOCAL, we just need
- // addrspacecast to ADDRESS_SPACE_GENERIC.
- if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) {
- auto ASCastToLocalAS = new AddrSpaceCastInst(
- allocaInst,
- PointerType::get(ETy->getContext(), ADDRESS_SPACE_LOCAL), "");
- ASCastToLocalAS->insertAfter(allocaInst->getIterator());
- AllocaInLocalAS = ASCastToLocalAS;
- }
+ Alloca->replaceAllUsesWith(Cast);
+ Alloca->eraseFromParent();
+ }
+ return true;
+}
- auto AllocaInGenericAS = new AddrSpaceCastInst(
- AllocaInLocalAS,
- PointerType::get(ETy->getContext(), ADDRESS_SPACE_GENERIC), "");
- AllocaInGenericAS->insertAfter(AllocaInLocalAS->getIterator());
+bool NVPTXLowerAlloca::doInitialization(Module &M) {
+ const auto &DL = M.getDataLayout();
+ if (DL.getAllocaAddrSpace() == ADDRESS_SPACE_LOCAL)
+ return false;
+ auto DLStr = DL.getStringRepresentation();
- for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
- // Check Load, Store, GEP, and BitCast Uses on alloca and make them
- // use the converted generic address, in order to expose non-generic
- // addrspacecast to NVPTXInferAddressSpaces. For other types
- // of instructions this is unnecessary and may introduce redundant
- // address cast.
- auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
- if (LI && LI->getPointerOperand() == allocaInst &&
- !LI->isVolatile()) {
- LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS);
- continue;
- }
- auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
- if (SI && SI->getPointerOperand() == allocaInst &&
- !SI->isVolatile()) {
- SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS);
- continue;
- }
- auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
- if (GI && GI->getPointerOperand() == allocaInst) {
- GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS);
- continue;
- }
- auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
- if (BI && BI->getOperand(0) == allocaInst) {
- BI->setOperand(0, AllocaInGenericAS);
- continue;
- }
- }
- }
- }
- return Changed;
+ auto AddrSpaceStr = "A" + std::to_string(ADDRESS_SPACE_LOCAL);
+ assert(!StringRef(DLStr).contains("A") && "DataLayout should not contain A");
+ M.setDataLayout(DLStr.empty() ? AddrSpaceStr : DLStr + "-" + AddrSpaceStr);
+ return true;
}
FunctionPass *llvm::createNVPTXLowerAllocaPass() {
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index e2bbe57c0085c..2b18ca9dd774a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -529,7 +529,8 @@ void copyByValParam(Function &F, Argument &Arg) {
// the use of the byval parameter with this alloca instruction.
AllocA->setAlignment(
Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
- Arg.replaceAllUsesWith(AllocA);
+ auto *AddressSpaceCast = IRB.CreateAddrSpaceCast(AllocA, Arg.getType(), Arg.getName());
+ Arg.replaceAllUsesWith(AddressSpaceCast);
CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg);
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 646b554878c70..0c56caeadcffe 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -119,7 +119,7 @@ bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
- MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(getFrameLocalRegister(MF), false);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return false;
}
@@ -127,14 +127,18 @@ bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const NVPTXTargetMachine &TM =
static_cast<const NVPTXTargetMachine &>(MF.getTarget());
- return TM.is64Bit() ? NVPTX::VRFrame64 : NVPTX::VRFrame32;
+ return TM.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8
+ ? NVPTX::VRFrame64
+ : NVPTX::VRFrame32;
}
Register
NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const {
const NVPTXTargetMachine &TM =
static_cast<const NVPTXTargetMachine &>(MF.getTarget());
- return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32;
+ return TM.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8
+ ? NVPTX::VRFrameLocal64
+ : NVPTX::VRFrameLocal32;
}
void NVPTXRegisterInfo::clearDebugRegisterMap() const {
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 0603994606d71..209dd51a44a61 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -371,6 +371,8 @@ void NVPTXPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOptLevel::None) {
addAddressSpaceInferencePasses();
addStraightLineScalarOptimizationPasses();
+ } else {
+ addPass(createNVPTXLowerAllocaPass());
}
addPass(createAtomicExpandLegacyPass());
@@ -502,3 +504,25 @@ void NVPTXPassConfig::addMachineSSAOptimization() {
addPass(&PeepholeOptimizerLegacyID);
printAndVerify("After codegen peephole optimization pass");
}
+
+bool NVPTXTargetMachine::isCompatibleDataLayout(
+ const DataLayout &Candidate) const {
+ //XXX: Should we enforce that the Candidate DataLayout has the same address space for allocas?
+ if (DL == Candidate)
+ return true;
+
+ auto DLStr = DL.getStringRepresentation();
+ if (!StringRef(DLStr).contains("A"))
+ DLStr = DLStr.empty() ? "A" + std::to_string(ADDRESS_SPACE_LOCAL)
+ : DLStr + "-A" + std::to_string(ADDRESS_SPACE_LOCAL);
+ auto NewDL = DataLayout(DLStr);
+
+ return NewDL == Candidate;
+}
+
+unsigned NVPTXTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
+ if (Kind == PseudoSourceValue::FixedStack) {
+ return ADDRESS_SPACE_LOCAL;
+ }
+ return CodeGenTargetMachineImpl::getAddressSpaceForPseudoSourceKind(Kind);
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 118a01a0352f5..c2f09a89865c4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -76,6 +76,9 @@ class NVPTXTargetMachine : public CodeGenTargetMachineImpl {
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override;
+
+ bool isCompatibleDataLayout(const DataLayout &Candidate) const override;
+ unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override;
}; // NVPTXTargetMachine.
class NVPTXTargetMachine32 : public NVPTXTargetMachine {
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 0eb7f6462f6fa..e825f8ef19949 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -25,9 +25,10 @@ entry:
; CHECK: ld.param.b64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]
-; CHECK: add.u64 %rd[[SP_REG:[0-9]+]], %SP, 0
+; CHECK: add.u64 %rd[[SP_REG0:[0-9]+]], %SPL, 0
+; CHECK: cvta.local.u64 %rd[[SP_REG:[0-9]+]], %rd[[SP_REG0]];
; CHECK: ld.global.b32 %r[[A0_REG:[0-9]+]], [%rd[[A1_REG]]]
-; CHECK: st.local.b32 [{{%rd[0-9]+}}], %r[[A0_REG]]
+; CHECK: st.local.b32 [%SPL], %r[[A0_REG]]
%0 = load float, ptr %a, align 4
store float %0, ptr %buf, align 4
diff --git a/llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll b/llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll
index 0474d82556c1e..34702f1c177c5 100644
--- a/llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll
+++ b/llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll
@@ -13,13 +13,13 @@ define void @foo(i64 %a, ptr %p0, ptr %p1) {
; CHECK-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-NEXT: alloca.u64 %rd4, %rd3, 16;
-; CHECK-NEXT: cvta.local.u64 %rd5, %rd4;
-; CHECK-NEXT: ld.param.b64 %rd6, [foo_param_1];
-; CHECK-NEXT: alloca.u64 %rd7, %rd3, 16;
-; CHECK-NEXT: cvta.local.u64 %rd8, %rd7;
-; CHECK-NEXT: ld.param.b64 %rd9, [foo_param_2];
-; CHECK-NEXT: st.b64 [%rd6], %rd5;
-; CHECK-NEXT: st.b64 [%rd9], %rd8;
+; CHECK-NEXT: ld.param.b64 %rd5, [foo_param_1];
+; CHECK-NEXT: cvta.local.u64 %rd6, %rd4;
+; CHECK-NEXT: ld.param.b64 %rd7, [foo_param_2];
+; CHECK-NEXT: alloca.u64 %rd8, %rd3, 16;
+; CHECK-NEXT: cvta.local.u64 %rd9, %rd8;
+; CHECK-NEXT: st.b64 [%rd5], %rd6;
+; CHECK-NEXT: st.b64 [%rd7], %rd9;
; CHECK-NEXT: ret;
%b = alloca i8, i64 %a, align 16
%c = alloca i8, i64 %a, align 16
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index 7ca16f702d8f3..cf00c2c9eaa3f 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -86,15 +86,14 @@ define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
; CHECK-NOF32X2-EMPTY:
; CHECK-NOF32X2-NEXT: // %bb.0:
; CHECK-NOF32X2-NEXT: mov.b64 %SPL, __local_depot3;
-; CHECK-NOF32X2-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_i_param_0];
; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
-; CHECK-NOF32X2-NEXT: st.v2.b32 [%SP], {%r1, %r2};
+; CHECK-NOF32X2-NEXT: st.local.v2.b32 [%SPL], {%r1, %r2};
; CHECK-NOF32X2-NEXT: and.b64 %rd2, %rd1, 1;
; CHECK-NOF32X2-NEXT: shl.b64 %rd3, %rd2, 2;
-; CHECK-NOF32X2-NEXT: add.u64 %rd4, %SP, 0;
+; CHECK-NOF32X2-NEXT: add.u64 %rd4, %SPL, 0;
; CHECK-NOF32X2-NEXT: or.b64 %rd5, %rd4, %rd3;
-; CHECK-NOF32X2-NEXT: ld.b32 %r3, [%rd5];
+; CHECK-NOF32X2-NEXT: ld.local.b32 %r3, [%rd5];
; CHECK-NOF32X2-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NOF32X2-NEXT: ret;
;
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
index 673fb73948268..39813efef9b64 100644
--- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -21,19 +21,18 @@ define internal i32 @foo() {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: mov.b64 %SPL, __local_depot0;
-; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
-; CHECK-NEXT: ld.global.b64 %rd1, [ptr];
+; CHECK-NEXT: add.u64 %rd1, %SPL, 0;
+; CHECK-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-NEXT: ld.global.b64 %rd3, [ptr];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 1 .b8 param0[1];
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: add.u64 %rd2, %SP, 0;
; CHECK-NEXT: st.param.b64 [param1], %rd2;
-; CHECK-NEXT: add.u64 %rd3, %SPL, 1;
-; CHECK-NEXT: ld.local.b8 %rs1, [%rd3];
+; CHECK-NEXT: ld.local.b8 %rs1, [%SPL+1];
; CHECK-NEXT: st.param.b8 [param0], %rs1;
; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
-; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0;
+; CHECK-NEXT: call (retval0), %rd3, (param0, param1), prototype_0;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
@@ -58,19 +57,18 @@ define internal i32 @bar() {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: mov.b64 %SPL, __local_depot1;
-; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
-; CHECK-NEXT: ld.global.b64 %rd1, [ptr];
+; CHECK-NEXT: add.u64 %rd1, %SPL, 0;
+; CHECK-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-NEXT: ld.global.b64 %rd3, [ptr];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: add.u64 %rd2, %SP, 0;
; CHECK-NEXT: st.param.b64 [param1], %rd2;
-; CHECK-NEXT: add.u64 %rd3, %SPL, 8;
-; CHECK-NEXT: ld.local.b64 %rd4, [%rd3];
+; CHECK-NEXT: ld.local.b64 %rd4, [%SPL+8];
; CHECK-NEXT: st.param.b64 [param0], %rd4;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
-; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1;
+; CHECK-NEXT: call (retval0), %rd3, (param0, param1), prototype_1;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index f7137e05a5e4f..3e6f5e37aa85f 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -12,13 +12,12 @@ define void @foo(i32 %a) {
; PTX32-NEXT: .local .align 4 .b8 __local_depot0[4];
; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
-; PTX32-NEXT: .reg .b32 %r<3>;
+; PTX32-NEXT: .reg .b32 %r<2>;
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
; PTX32-NEXT: mov.b32 %SPL, __local_depot0;
; PTX32-NEXT: ld.param.b32 %r1, [foo_param_0];
-; PTX32-NEXT: add.u32 %r2, %SPL, 0;
-; PTX32-NEXT: st.local.b32 [%r2], %r1;
+; PTX32-NEXT: st.local.b32 [%SPL], %r1;
; PTX32-NEXT: ret;
;
; PTX64-LABEL: foo(
@@ -27,13 +26,11 @@ define void @foo(i32 %a) {
; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
-; PTX64-NEXT: .reg .b64 %rd<2>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
; PTX64-NEXT: mov.b64 %SPL, __local_depot0;
; PTX64-NEXT: ld.param.b32 %r1, [foo_param_0];
-; PTX64-NEXT: add.u64 %rd1, %SPL, 0;
-; PTX64-NEXT: st.local.b32 [%rd1], %r1;
+; PTX64-NEXT: st.local.b32 [%SPL], %r1;
; PTX64-NEXT: ret;
%local = alloca i32, align 4
store volatile i32 %a, ptr %local
@@ -50,14 +47,13 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
; PTX32-NEXT: mov.b32 %SPL, __local_depot1;
-; PTX32-NEXT: cvta.local.u32 %SP, %SPL;
; PTX32-NEXT: ld.param.b32 %r1, [foo2_param_0];
-; PTX32-NEXT: add.u32 %r2, %SP, 0;
-; PTX32-NEXT: add.u32 %r3, %SPL, 0;
-; PTX32-NEXT: st.local.b32 [%r3], %r1;
+; PTX32-NEXT: add.u32 %r2, %SPL, 0;
+; PTX32-NEXT: cvta.local.u32 %r3, %r2;
+; PTX32-NEXT: st.local.b32 [%SPL], %r1;
; PTX32-NEXT: { // callseq 0, 0
; PTX32-NEXT: .param .b32 param0;
-; PTX32-NEXT: st.param.b32 [param0], %r2;
+; PTX32-NEXT: st.param.b32 [param0], %r3;
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 0
; PTX32-NEXT: ret;
@@ -72,14 +68,13 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
; PTX64-NEXT: mov.b64 %SPL, __local_depot1;
-; PTX64-NEXT: cvta.local.u64 %SP, %SPL;
; PTX64-NEXT: ld.param.b32 %r1, [foo2_param_0];
-; PTX64-NEXT: add.u64 %rd1, %SP, 0;
-; PTX64-NEXT: add.u64 %rd2, %SPL, 0;
-; PTX64-NEXT: st.local.b32 [%rd2], %r1;
+; PTX64-NEXT: add.u64 %rd1, %SPL, 0;
+; PTX64-NEXT: cvta.local.u64 %rd2, %rd1;
+; PTX64-NEXT: st.local.b32 [%SPL], %r1;
; PTX64-NEXT: { // callseq 0, 0
; PTX64-NEXT: .param .b64 param0;
-; PTX64-NEXT: st.param.b64 [param0], %rd1;
+; PTX64-NEXT: st.param.b64 [param0], %rd2;
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 0
; PTX64-NEXT: ret;
@@ -102,9 +97,9 @@ define void @foo3(i32 %a) {
; PTX32-NEXT: // %bb.0:
; PTX32-NEXT: mov.b32 %SPL, __local_depot2;
; PTX32-NEXT: ld.param.b32 %r1, [foo3_param_0];
-; PTX32-NEXT: add.u32 %r2, %SPL, 0;
-; PTX32-NEXT: shl.b32 %r3, %r1, 2;
-; PTX32-NEXT: add.s32 %r4, %r2, %r3;
+; PTX32-NEXT: shl.b32 %r2, %r1, 2;
+; PTX32-NEXT: add.u32 %r3, %SPL, 0;
+; PTX32-NEXT: add.s32 %r4, %r3, %r2;
; PTX32-NEXT: st.local.b32 [%r4], %r1;
; PTX32-NEXT: ret;
;
@@ -139,21 +134,20 @@ define void @foo4() {
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
; PTX32-NEXT: mov.b32 %SPL, __local_depot3;
-; PTX32-NEXT: cvta.local.u32 %SP, %SPL;
-; PTX32-NEXT: add.u32 %r1, %SP, 0;
-; PTX32-NEXT: add.u32 %r2, %SPL, 0;
-; PTX32-NEXT: add.u32 %r3, %SP, 4;
-; PTX32-NEXT: add.u32 %r4, %SPL, 4;
-; PTX32-NEXT: st.local.b32 [%r2], 0;
-; PTX32-NEXT: st.local.b32 [%r4], 0;
+; PTX32-NEXT: add.u32 %r1, %SPL, 0;
+; PTX32-NEXT: cvta.local.u32 %r2, %r1;
+; PTX32-NEXT: add.u32 %r3, %SPL, 4;
+; PTX32-NEXT: cvta.local.u32 %r4, %r3;
+; PTX32-NEXT: st.local.b32 [%SPL], 0;
+; PTX32-NEXT: st.local.b32 [%SPL+4], 0;
; PTX32-NEXT: { // callseq 1, 0
; PTX32-NEXT: .param .b32 param0;
-; PTX32-NEXT: st.param.b32 [param0], %r1;
+; PTX32-NEXT: st.param.b32 [param0], %r2;
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 1
; PTX32-NEXT: { // callseq 2, 0
; PTX32-NEXT: .param .b32 param0;
-; PTX32-NEXT: st.param.b32 [param0], %r3;
+; PTX32-NEXT: st.param.b32 [param0], %r4;
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 2
; PTX32-NEXT: ret;
@@ -167,21 +161,20 @@ define void @foo4() {
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
; PTX64-NEXT: mov.b64 %SPL, __local_depot3;
-; PTX64-NEXT: cvta.local.u64 %SP, %SPL;
-; PTX64-NEXT: add.u64 %rd1, %SP, 0;
-; PTX64-NEXT: add.u64 %rd2, %SPL, 0;
-; PTX64-NEXT: add.u64 %rd3, %SP, 4;
-; PTX64-NEXT: add.u64 %rd4, %SPL, 4;
-; PTX64-NEXT: st.local.b32 [%rd2], 0;
-; PTX64-NEXT: st.local.b32 [%rd4], 0;
+; PTX64-NEXT: add.u64 %rd1, %SPL, 0;
+; PTX64-NEXT: cvta.local.u64 %rd2, %rd1;
+; PTX64-NEXT: add.u64 %rd3, %SPL, 4;
+; PTX64-NEXT: cvta.local.u64 %rd4, %rd3;
+; PTX64-NEXT: st.local.b32 [%SPL], 0;
+; PTX64-NEXT: st.local.b32 [%SPL+4], 0;
; PTX64-NEXT: { // callseq 1, 0
; PTX64-NEXT: .param .b64 param0;
-; PTX64-NEXT: st.param.b64 [param0], %rd1;
+; PTX64-NEXT: st.param.b64 [param0], %rd2;
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 1
; PTX64-NEXT: { // callseq 2, 0
; PTX64-NEXT: .param .b64 param0;
-; PTX64-NEXT: st.param.b64 [param0], %rd3;
+; PTX64-NEXT: st.param.b64 [param0], %rd4;
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 2
; PTX64-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/lower-alloca.ll b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
index 57c1e5826c89a..7bc90c42ad12f 100644
--- a/llvm/test/CodeGen/NVPTX/lower-alloca.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-alloca.ll
@@ -7,28 +7,28 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "nvptx64-unknown-unknown"
define ptx_kernel void @kernel() {
-; LABEL: @lower_alloca
+; CHECK-LABEL: @kernel
; PTX-LABEL: .visible .entry kernel(
%A = alloca i32
-; CHECK: addrspacecast ptr %A to ptr addrspace(5)
-; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
-; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr %A to ptr addrspace(5)
-; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) [[V1]] to ptr
+; CHECK: %A1 = alloca i32, align 4, addrspace(5)
+; CHECK: store i32 0, ptr addrspace(5) %A1
+; LOWERALLOCAONLY: %A1 = alloca i32, align 4, addrspace(5)
+; LOWERALLOCAONLY: [[V2:%.*]] = addrspacecast ptr addrspace(5) %A1 to ptr
; LOWERALLOCAONLY: store i32 0, ptr [[V2]], align 4
-; PTX: st.local.b32 [{{%rd[0-9]+}}], 0
+; PTX: st.local.b32 [%SPL], 0
store i32 0, ptr %A
call void @callee(ptr %A)
ret void
}
define void @alloca_in_explicit_local_as() {
-; LABEL: @lower_alloca_addrspace5
+; CHECK-LABEL: @alloca_in_explicit_local_as
; PTX-LABEL: .visible .func alloca_in_explicit_local_as(
%A = alloca i32, addrspace(5)
-; CHECK: store i32 0, ptr addrspace(5) {{%.+}}
-; PTX: st.local.b32 [%SP], 0
-; LOWERALLOCAONLY: [[V1:%.*]] = addrspacecast ptr addrspace(5) %A to ptr
-; LOWERALLOCAONLY: store i32 0, ptr [[V1]], align 4
+; CHECK: store i32 0, ptr addrspace(5) %A, align 4
+; PTX: st.local.b32 [%SPL], 0
+; LOWERALLOCAONLY: %A = alloca i32, align 4, addrspace(5)
+; LOWERALLOCAONLY: store i32 0, ptr addrspace(5) %A
store i32 0, ptr addrspace(5) %A
call void @callee(ptr addrspace(5) %A)
ret void
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 8adde4ceefbf4..f4a24ab41ba17 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -156,22 +156,21 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %SPL, __local_depot4;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: mov.b64 %rd1, multiple_grid_const_escape_param_0;
; PTX-NEXT: ld.param.b32 %r1, [multiple_grid_const_escape_param_1];
; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_2;
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
; PTX-NEXT: cvta.param.u64 %rd4, %rd1;
-; PTX-NEXT: add.u64 %rd5, %SP, 0;
-; PTX-NEXT: add.u64 %rd6, %SPL, 0;
-; PTX-NEXT: st.local.b32 [%rd6], %r1;
+; PTX-NEXT: add.u64 %rd5, %SPL, 0;
+; PTX-NEXT: cvta.local.u64 %rd6, %rd5;
+; PTX-NEXT: st.local.b32 [%SPL], %r1;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: .param .b64 param1;
; PTX-NEXT: .param .b64 param2;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: st.param.b64 [param2], %rd3;
-; PTX-NEXT: st.param.b64 [param1], %rd5;
+; PTX-NEXT: st.param.b64 [param1], %rd6;
; PTX-NEXT: st.param.b64 [param0], %rd4;
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
; PTX-NEXT: mov.b64 %rd7, escape3;
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 21257e21bea9f..d8a9530e931a6 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -135,24 +135,23 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
;
; PTX-LABEL: escape_ptr(
; PTX: {
-; PTX-NEXT: .local .align 4 .b8 __local_depot2[8];
+; PTX-NEXT: .local .align 8 .b8 __local_depot2[8];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b32 %r<3>;
-; PTX-NEXT: .reg .b64 %rd<3>;
+; PTX-NEXT: .reg .b64 %rd<7>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot2;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
-; PTX-NEXT: add.u64 %rd1, %SP, 0;
-; PTX-NEXT: add.u64 %rd2, %SPL, 0;
-; PTX-NEXT: ld.param.b32 %r1, [escape_ptr_param_1+4];
-; PTX-NEXT: st.local.b32 [%rd2+4], %r1;
-; PTX-NEXT: ld.param.b32 %r2, [escape_ptr_param_1];
-; PTX-NEXT: st.local.b32 [%rd2], %r2;
+; PTX-NEXT: add.u64 %rd1, %SPL, 0;
+; PTX-NEXT: cvta.local.u64 %rd2, %rd1;
+; PTX-NEXT: ld.param.b32 %rd3, [escape_ptr_param_1+4];
+; PTX-NEXT: shl.b64 %rd4, %rd3, 32;
+; PTX-NEXT: ld.param.b32 %rd5, [escape_ptr_param_1];
+; PTX-NEXT: or.b64 %rd6, %rd4, %rd5;
+; PTX-NEXT: st.local.b64 [%SPL], %rd6;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd1;
+; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: call.uni _Z6escapePv, (param0);
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
@@ -175,25 +174,24 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
;
; PTX-LABEL: escape_ptr_gep(
; PTX: {
-; PTX-NEXT: .local .align 4 .b8 __local_depot3[8];
+; PTX-NEXT: .local .align 8 .b8 __local_depot3[8];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b32 %r<3>;
-; PTX-NEXT: .reg .b64 %rd<4>;
+; PTX-NEXT: .reg .b64 %rd<8>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot3;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
-; PTX-NEXT: add.u64 %rd1, %SP, 0;
-; PTX-NEXT: add.u64 %rd2, %SPL, 0;
-; PTX-NEXT: ld.param.b32 %r1, [escape_ptr_gep_param_1+4];
-; PTX-NEXT: st.local.b32 [%rd2+4], %r1;
-; PTX-NEXT: ld.param.b32 %r2, [escape_ptr_gep_param_1];
-; PTX-NEXT: st.local.b32 [%rd2], %r2;
-; PTX-NEXT: add.s64 %rd3, %rd1, 4;
+; PTX-NEXT: add.u64 %rd1, %SPL, 0;
+; PTX-NEXT: cvta.local.u64 %rd2, %rd1;
+; PTX-NEXT: ld.param.b32 %rd3, [escape_ptr_gep_param_1+4];
+; PTX-NEXT: shl.b64 %rd4, %rd3, 32;
+; PTX-NEXT: ld.param.b32 %rd5, [escape_ptr_gep_param_1];
+; PTX-NEXT: or.b64 %rd6, %rd4, %rd5;
+; PTX-NEXT: st.local.b64 [%SPL], %rd6;
+; PTX-NEXT: add.s64 %rd7, %rd2, 4;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd3;
+; PTX-NEXT: st.param.b64 [param0], %rd7;
; PTX-NEXT: call.uni _Z6escapePv, (param0);
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
@@ -216,24 +214,23 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
;
; PTX-LABEL: escape_ptr_store(
; PTX: {
-; PTX-NEXT: .local .align 4 .b8 __local_depot4[8];
+; PTX-NEXT: .local .align 8 .b8 __local_depot4[8];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b32 %r<3>;
-; PTX-NEXT: .reg .b64 %rd<5>;
+; PTX-NEXT: .reg .b64 %rd<9>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot4;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b64 %rd1, [escape_ptr_store_param_0];
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
-; PTX-NEXT: add.u64 %rd3, %SP, 0;
-; PTX-NEXT: add.u64 %rd4, %SPL, 0;
-; PTX-NEXT: ld.param.b32 %r1, [escape_ptr_store_param_1+4];
-; PTX-NEXT: st.local.b32 [%rd4+4], %r1;
-; PTX-NEXT: ld.param.b32 %r2, [escape_ptr_store_param_1];
-; PTX-NEXT: st.local.b32 [%rd4], %r2;
-; PTX-NEXT: st.global.b64 [%rd2], %rd3;
+; PTX-NEXT: add.u64 %rd3, %SPL, 0;
+; PTX-NEXT: cvta.local.u64 %rd4, %rd3;
+; PTX-NEXT: ld.param.b32 %rd5, [escape_ptr_store_param_1+4];
+; PTX-NEXT: shl.b64 %rd6, %rd5, 32;
+; PTX-NEXT: ld.param.b32 %rd7, [escape_ptr_store_param_1];
+; PTX-NEXT: or.b64 %rd8, %rd6, %rd7;
+; PTX-NEXT: st.local.b64 [%SPL], %rd8;
+; PTX-NEXT: st.global.b64 [%rd2], %rd4;
; PTX-NEXT: ret;
entry:
store ptr %s, ptr %out, align 8
@@ -254,25 +251,24 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
;
; PTX-LABEL: escape_ptr_gep_store(
; PTX: {
-; PTX-NEXT: .local .align 4 .b8 __local_depot5[8];
+; PTX-NEXT: .local .align 8 .b8 __local_depot5[8];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b32 %r<3>;
-; PTX-NEXT: .reg .b64 %rd<6>;
+; PTX-NEXT: .reg .b64 %rd<10>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot5;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b64 %rd1, [escape_ptr_gep_store_param_0];
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
-; PTX-NEXT: add.u64 %rd3, %SP, 0;
-; PTX-NEXT: add.u64 %rd4, %SPL, 0;
-; PTX-NEXT: ld.param.b32 %r1, [escape_ptr_gep_store_param_1+4];
-; PTX-NEXT: st.local.b32 [%rd4+4], %r1;
-; PTX-NEXT: ld.param.b32 %r2, [escape_ptr_gep_store_param_1];
-; PTX-NEXT: st.local.b32 [%rd4], %r2;
-; PTX-NEXT: add.s64 %rd5, %rd3, 4;
-; PTX-NEXT: st.global.b64 [%rd2], %rd5;
+; PTX-NEXT: add.u64 %rd3, %SPL, 0;
+; PTX-NEXT: cvta.local.u64 %rd4, %rd3;
+; PTX-NEXT: ld.param.b32 %rd5, [escape_ptr_gep_store_param_1+4];
+; PTX-NEXT: shl.b64 %rd6, %rd5, 32;
+; PTX-NEXT: ld.param.b32 %rd7, [escape_ptr_gep_store_param_1];
+; PTX-NEXT: or.b64 %rd8, %rd6, %rd7;
+; PTX-NEXT: st.local.b64 [%SPL], %rd8;
+; PTX-NEXT: add.s64 %rd9, %rd4, 4;
+; PTX-NEXT: st.global.b64 [%rd2], %rd9;
; PTX-NEXT: ret;
entry:
%b = getelementptr inbounds nuw i8, ptr %s, i64 4
@@ -294,24 +290,23 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
;
; PTX-LABEL: escape_ptrtoint(
; PTX: {
-; PTX-NEXT: .local .align 4 .b8 __local_depot6[8];
+; PTX-NEXT: .local .align 8 .b8 __local_depot6[8];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b32 %r<3>;
-; PTX-NEXT: .reg .b64 %rd<5>;
+; PTX-NEXT: .reg .b64 %rd<9>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot6;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b64 %rd1, [escape_ptrtoint_param_0];
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
-; PTX-NEXT: add.u64 %rd3, %SP, 0;
-; PTX-NEXT: add.u64 %rd4, %SPL, 0;
-; PTX-NEXT: ld.param.b32 %r1, [escape_ptrtoint_param_1+4];
-; PTX-NEXT: st.local.b32 [%rd4+4], %r1;
-; PTX-NEXT: ld.param.b32 %r2, [escape_ptrtoint_param_1];
-; PTX-NEXT: st.local.b32 [%rd4], %r2;
-; PTX-NEXT: st.global.b64 [%rd2], %rd3;
+; PTX-NEXT: add.u64 %rd3, %SPL, 0;
+; PTX-NEXT: cvta.local.u64 %rd4, %rd3;
+; PTX-NEXT: ld.param.b32 %rd5, [escape_ptrtoint_param_1+4];
+; PTX-NEXT: shl.b64 %rd6, %rd5, 32;
+; PTX-NEXT: ld.param.b32 %rd7, [escape_ptrtoint_param_1];
+; PTX-NEXT: or.b64 %rd8, %rd6, %rd7;
+; PTX-NEXT: st.local.b64 [%SPL], %rd8;
+; PTX-NEXT: st.global.b64 [%rd2], %rd4;
; PTX-NEXT: ret;
entry:
%i = ptrtoint ptr %s to i64
@@ -455,64 +450,51 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
; PTX-NEXT: .local .align 8 .b8 __local_depot9[8];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b32 %r<3>;
-; PTX-NEXT: .reg .b64 %rd<47>;
+; PTX-NEXT: .reg .b16 %rs<17>;
+; PTX-NEXT: .reg .b64 %rd<8>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot9;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b64 %rd1, [memcpy_to_param_param_0];
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
-; PTX-NEXT: ld.param.b32 %r1, [memcpy_to_param_param_1+4];
-; PTX-NEXT: st.local.b32 [%rd2+4], %r1;
-; PTX-NEXT: ld.param.b32 %r2, [memcpy_to_param_param_1];
-; PTX-NEXT: st.local.b32 [%rd2], %r2;
-; PTX-NEXT: ld.volatile.b8 %rd3, [%rd1];
-; PTX-NEXT: ld.volatile.b8 %rd4, [%rd1+1];
-; PTX-NEXT: shl.b64 %rd5, %rd4, 8;
-; PTX-NEXT: or.b64 %rd6, %rd5, %rd3;
-; PTX-NEXT: ld.volatile.b8 %rd7, [%rd1+2];
-; PTX-NEXT: shl.b64 %rd8, %rd7, 16;
-; PTX-NEXT: ld.volatile.b8 %rd9, [%rd1+3];
-; PTX-NEXT: shl.b64 %rd10, %rd9, 24;
-; PTX-NEXT: or.b64 %rd11, %rd10, %rd8;
-; PTX-NEXT: or.b64 %rd12, %rd11, %rd6;
-; PTX-NEXT: ld.volatile.b8 %rd13, [%rd1+4];
-; PTX-NEXT: ld.volatile.b8 %rd14, [%rd1+5];
-; PTX-NEXT: shl.b64 %rd15, %rd14, 8;
-; PTX-NEXT: or.b64 %rd16, %rd15, %rd13;
-; PTX-NEXT: ld.volatile.b8 %rd17, [%rd1+6];
-; PTX-NEXT: shl.b64 %rd18, %rd17, 16;
-; PTX-NEXT: ld.volatile.b8 %rd19, [%rd1+7];
-; PTX-NEXT: shl.b64 %rd20, %rd19, 24;
-; PTX-NEXT: or.b64 %rd21, %rd20, %rd18;
-; PTX-NEXT: or.b64 %rd22, %rd21, %rd16;
-; PTX-NEXT: shl.b64 %rd23, %rd22, 32;
-; PTX-NEXT: or.b64 %rd24, %rd23, %rd12;
-; PTX-NEXT: st.volatile.b64 [%SP], %rd24;
-; PTX-NEXT: ld.volatile.b8 %rd25, [%rd1+8];
-; PTX-NEXT: ld.volatile.b8 %rd26, [%rd1+9];
-; PTX-NEXT: shl.b64 %rd27, %rd26, 8;
-; PTX-NEXT: or.b64 %rd28, %rd27, %rd25;
-; PTX-NEXT: ld.volatile.b8 %rd29, [%rd1+10];
-; PTX-NEXT: shl.b64 %rd30, %rd29, 16;
-; PTX-NEXT: ld.volatile.b8 %rd31, [%rd1+11];
-; PTX-NEXT: shl.b64 %rd32, %rd31, 24;
-; PTX-NEXT: or.b64 %rd33, %rd32, %rd30;
-; PTX-NEXT: or.b64 %rd34, %rd33, %rd28;
-; PTX-NEXT: ld.volatile.b8 %rd35, [%rd1+12];
-; PTX-NEXT: ld.volatile.b8 %rd36, [%rd1+13];
-; PTX-NEXT: shl.b64 %rd37, %rd36, 8;
-; PTX-NEXT: or.b64 %rd38, %rd37, %rd35;
-; PTX-NEXT: ld.volatile.b8 %rd39, [%rd1+14];
-; PTX-NEXT: shl.b64 %rd40, %rd39, 16;
-; PTX-NEXT: ld.volatile.b8 %rd41, [%rd1+15];
-; PTX-NEXT: shl.b64 %rd42, %rd41, 24;
-; PTX-NEXT: or.b64 %rd43, %rd42, %rd40;
-; PTX-NEXT: or.b64 %rd44, %rd43, %rd38;
-; PTX-NEXT: shl.b64 %rd45, %rd44, 32;
-; PTX-NEXT: or.b64 %rd46, %rd45, %rd34;
-; PTX-NEXT: st.volatile.b64 [%SP+8], %rd46;
+; PTX-NEXT: cvta.local.u64 %rd3, %rd2;
+; PTX-NEXT: ld.param.b32 %rd4, [memcpy_to_param_param_1+4];
+; PTX-NEXT: shl.b64 %rd5, %rd4, 32;
+; PTX-NEXT: ld.param.b32 %rd6, [memcpy_to_param_param_1];
+; PTX-NEXT: or.b64 %rd7, %rd5, %rd6;
+; PTX-NEXT: st.local.b64 [%SPL], %rd7;
+; PTX-NEXT: ld.volatile.b8 %rs1, [%rd1];
+; PTX-NEXT: st.volatile.b8 [%rd3], %rs1;
+; PTX-NEXT: ld.volatile.b8 %rs2, [%rd1+1];
+; PTX-NEXT: st.volatile.b8 [%rd3+1], %rs2;
+; PTX-NEXT: ld.volatile.b8 %rs3, [%rd1+2];
+; PTX-NEXT: st.volatile.b8 [%rd3+2], %rs3;
+; PTX-NEXT: ld.volatile.b8 %rs4, [%rd1+3];
+; PTX-NEXT: st.volatile.b8 [%rd3+3], %rs4;
+; PTX-NEXT: ld.volatile.b8 %rs5, [%rd1+4];
+; PTX-NEXT: st.volatile.b8 [%rd3+4], %rs5;
+; PTX-NEXT: ld.volatile.b8 %rs6, [%rd1+5];
+; PTX-NEXT: st.volatile.b8 [%rd3+5], %rs6;
+; PTX-NEXT: ld.volatile.b8 %rs7, [%rd1+6];
+; PTX-NEXT: st.volatile.b8 [%rd3+6], %rs7;
+; PTX-NEXT: ld.volatile.b8 %rs8, [%rd1+7];
+; PTX-NEXT: st.volatile.b8 [%rd3+7], %rs8;
+; PTX-NEXT: ld.volatile.b8 %rs9, [%rd1+8];
+; PTX-NEXT: st.volatile.b8 [%rd3+8], %rs9;
+; PTX-NEXT: ld.volatile.b8 %rs10, [%rd1+9];
+; PTX-NEXT: st.volatile.b8 [%rd3+9], %rs10;
+; PTX-NEXT: ld.volatile.b8 %rs11, [%rd1+10];
+; PTX-NEXT: st.volatile.b8 [%rd3+10], %rs11;
+; PTX-NEXT: ld.volatile.b8 %rs12, [%rd1+11];
+; PTX-NEXT: st.volatile.b8 [%rd3+11], %rs12;
+; PTX-NEXT: ld.volatile.b8 %rs13, [%rd1+12];
+; PTX-NEXT: st.volatile.b8 [%rd3+12], %rs13;
+; PTX-NEXT: ld.volatile.b8 %rs14, [%rd1+13];
+; PTX-NEXT: st.volatile.b8 [%rd3+13], %rs14;
+; PTX-NEXT: ld.volatile.b8 %rs15, [%rd1+14];
+; PTX-NEXT: st.volatile.b8 [%rd3+14], %rs15;
+; PTX-NEXT: ld.volatile.b8 %rs16, [%rd1+15];
+; PTX-NEXT: st.volatile.b8 [%rd3+15], %rs16;
; PTX-NEXT: ret;
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %s, ptr %in, i64 16, i1 true)
@@ -655,17 +637,16 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %bb
; PTX-NEXT: mov.b64 %SPL, __local_depot12;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b8 %rs1, [test_select_write_param_3];
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
; PTX-NEXT: setp.ne.b16 %p1, %rs2, 0;
; PTX-NEXT: ld.param.b32 %r1, [test_select_write_param_1];
-; PTX-NEXT: st.b32 [%SP], %r1;
+; PTX-NEXT: st.local.b32 [%SPL], %r1;
; PTX-NEXT: ld.param.b32 %r2, [test_select_write_param_0];
-; PTX-NEXT: st.b32 [%SP+4], %r2;
-; PTX-NEXT: add.u64 %rd1, %SPL, 4;
-; PTX-NEXT: add.u64 %rd2, %SPL, 0;
-; PTX-NEXT: selp.b64 %rd3, %rd1, %rd2, %p1;
+; PTX-NEXT: st.local.b32 [%SPL+4], %r2;
+; PTX-NEXT: add.u64 %rd1, %SPL, 0;
+; PTX-NEXT: add.u64 %rd2, %SPL, 4;
+; PTX-NEXT: selp.b64 %rd3, %rd2, %rd1, %p1;
; PTX-NEXT: st.local.b32 [%rd3], 1;
; PTX-NEXT: ret;
bb:
@@ -830,25 +811,23 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
; PTX-NEXT: .reg .pred %p<2>;
; PTX-NEXT: .reg .b16 %rs<3>;
; PTX-NEXT: .reg .b32 %r<3>;
-; PTX-NEXT: .reg .b64 %rd<3>;
+; PTX-NEXT: .reg .b64 %rd<2>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %bb
; PTX-NEXT: mov.b64 %SPL, __local_depot14;
-; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b8 %rs1, [test_phi_write_param_2];
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
; PTX-NEXT: setp.ne.b16 %p1, %rs2, 0;
-; PTX-NEXT: add.u64 %rd1, %SPL, 0;
; PTX-NEXT: ld.param.b32 %r1, [test_phi_write_param_1+4];
-; PTX-NEXT: st.b32 [%SP], %r1;
-; PTX-NEXT: add.u64 %rd2, %SPL, 4;
+; PTX-NEXT: st.local.b32 [%SPL], %r1;
; PTX-NEXT: ld.param.b32 %r2, [test_phi_write_param_0];
-; PTX-NEXT: st.b32 [%SP+4], %r2;
+; PTX-NEXT: st.local.b32 [%SPL+4], %r2;
+; PTX-NEXT: add.u64 %rd1, %SPL, 4;
; PTX-NEXT: @%p1 bra $L__BB14_2;
; PTX-NEXT: // %bb.1: // %second
-; PTX-NEXT: mov.b64 %rd2, %rd1;
+; PTX-NEXT: add.u64 %rd1, %SPL, 0;
; PTX-NEXT: $L__BB14_2: // %merge
-; PTX-NEXT: st.local.b32 [%rd2], 1;
+; PTX-NEXT: st.local.b32 [%rd1], 1;
; PTX-NEXT: ret;
bb:
br i1 %cond, label %first, label %second
@@ -882,13 +861,11 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b32 %r<2>;
-; PTX-NEXT: .reg .b64 %rd<2>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %SPL, __local_depot15;
-; PTX-NEXT: add.u64 %rd1, %SPL, 0;
; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0];
-; PTX-NEXT: st.local.b32 [%rd1], %r1;
+; PTX-NEXT: st.local.b32 [%SPL], %r1;
; PTX-NEXT: { // callseq 2, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
; PTX-NEXT: st.param.b32 [param0], %r1;
diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll
index 9e312a2fec60a..19ed05e28a3d4 100644
--- a/llvm/test/CodeGen/NVPTX/vaargs.ll
+++ b/llvm/test/CodeGen/NVPTX/vaargs.ll
@@ -17,55 +17,55 @@ entry:
; Test va_start
; CHECK: .param .align 8 .b8 foo_vararg[]
; CHECK: mov.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], foo_vararg;
-; CHECK-NEXT: st.b[[BITS]] [%SP], [[VA_PTR]];
+; CHECK-NEXT: st.b[[BITS]] [[[SP1:%(r|rd)[0-9]+]]], [[VA_PTR]]
call void @llvm.va_start(ptr %al)
; Test va_copy()
-; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP];
-; CHECK-NEXT: st.b[[BITS]] [%SP+{{[0-9]+}}], [[VA_PTR]];
+; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [[[SP1]]];
+; CHECK-NEXT: st.b[[BITS]] [[[SP2:%(r|rd)[0-9]+]]], [[VA_PTR]];
call void @llvm.va_copy(ptr %al2, ptr %al)
; Test va_arg(ap, int32_t)
-; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP];
+; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [[[SP1]]];
; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 3;
; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -4;
; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 4;
-; CHECK-NEXT: st.b[[BITS]] [%SP], [[VA_PTR_NEXT]];
+; CHECK-NEXT: st.b[[BITS]] [[[SP1]]], [[VA_PTR_NEXT]];
; CHECK-NEXT: ld.local.b32 %r{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
%0 = va_arg ptr %al, i32
; Test va_arg(ap, int64_t)
-; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP];
+; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [[[SP1]]];
; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 7;
; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -8;
; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 8;
-; CHECK-NEXT: st.b[[BITS]] [%SP], [[VA_PTR_NEXT]];
+; CHECK-NEXT: st.b[[BITS]] [[[SP1]]], [[VA_PTR_NEXT]];
; CHECK-NEXT: ld.local.b64 %rd{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
%1 = va_arg ptr %al, i64
; Test va_arg(ap, double)
-; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP];
+; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [[[SP1]]];
; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 7;
; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -8;
; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 8;
-; CHECK-NEXT: st.b[[BITS]] [%SP], [[VA_PTR_NEXT]];
+; CHECK-NEXT: st.b[[BITS]] [[[SP1]]], [[VA_PTR_NEXT]];
; CHECK-NEXT: ld.local.b64 %rd{{[0-9]+}}, [[[VA_PTR_ALIGN]]];
%2 = va_arg ptr %al, double
; Test va_arg(ap, ptr)
-; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP];
+; CHECK-NEXT: ld.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [[[SP1]]];
; CHECK32-NEXT: add.s32 [[VA_PTR_TMP:%r[0-9]+]], [[VA_PTR]], 3;
; CHECK64-NEXT: add.s64 [[VA_PTR_TMP:%rd[0-9]+]], [[VA_PTR]], 7;
; CHECK32-NEXT: and.b32 [[VA_PTR_ALIGN:%r[0-9]+]], [[VA_PTR_TMP]], -4;
; CHECK64-NEXT: and.b64 [[VA_PTR_ALIGN:%rd[0-9]+]], [[VA_PTR_TMP]], -8;
; CHECK32-NEXT: add.s32 [[VA_PTR_NEXT:%r[0-9]+]], [[VA_PTR_ALIGN]], 4;
; CHECK64-NEXT: add.s64 [[VA_PTR_NEXT:%rd[0-9]+]], [[VA_PTR_ALIGN]], 8;
-; CHECK-NEXT: st.b[[BITS]] [%SP], [[VA_PTR_NEXT]];
+; CHECK-NEXT: st.b[[BITS]] [[[SP1]]], [[VA_PTR_NEXT]];
; CHECK-NEXT: ld.local.b[[BITS]] %{{(r|rd)[0-9]+}}, [[[VA_PTR_ALIGN]]];
%3 = va_arg ptr %al, ptr
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index 890753b6ac5aa..7fe0dda2bf94e 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -105,22 +105,22 @@ define dso_local i32 @foo() {
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<3>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot1;
-; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
-; CHECK-PTX-NEXT: st.b64 [%SP], 4294967297;
-; CHECK-PTX-NEXT: st.b32 [%SP+8], 1;
-; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
-; CHECK-PTX-NEXT: st.b64 [%SP+24], 4607182418800017408;
-; CHECK-PTX-NEXT: st.b64 [%SP+32], 4607182418800017408;
+; CHECK-PTX-NEXT: st.local.b64 [%SPL], 4294967297;
+; CHECK-PTX-NEXT: st.local.b32 [%SPL+8], 1;
+; CHECK-PTX-NEXT: st.local.b64 [%SPL+16], 1;
+; CHECK-PTX-NEXT: st.local.b64 [%SPL+24], 4607182418800017408;
+; CHECK-PTX-NEXT: st.local.b64 [%SPL+32], 4607182418800017408;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 0;
+; CHECK-PTX-NEXT: cvta.local.u64 %rd2, %rd1;
; CHECK-PTX-NEXT: { // callseq 0, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd2;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
@@ -138,34 +138,34 @@ entry:
define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-LABEL: variadics2(
; CHECK-PTX: {
-; CHECK-PTX-NEXT: .local .align 1 .b8 __local_depot2[3];
+; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
-; CHECK-PTX-NEXT: .reg .b16 %rs<4>;
+; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<6>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot2;
; CHECK-PTX-NEXT: ld.param.b32 %r1, [variadics2_param_0];
; CHECK-PTX-NEXT: ld.param.b64 %rd1, [variadics2_param_1];
-; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
-; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 7;
-; CHECK-PTX-NEXT: and.b64 %rd4, %rd3, -8;
-; CHECK-PTX-NEXT: ld.b32 %r2, [%rd4];
-; CHECK-PTX-NEXT: ld.s8 %r3, [%rd4+4];
-; CHECK-PTX-NEXT: ld.b8 %rs1, [%rd4+7];
-; CHECK-PTX-NEXT: st.local.b8 [%rd2+2], %rs1;
-; CHECK-PTX-NEXT: ld.b8 %rs2, [%rd4+6];
-; CHECK-PTX-NEXT: st.local.b8 [%rd2+1], %rs2;
-; CHECK-PTX-NEXT: ld.b8 %rs3, [%rd4+5];
-; CHECK-PTX-NEXT: st.local.b8 [%rd2], %rs3;
-; CHECK-PTX-NEXT: ld.b64 %rd5, [%rd4+8];
+; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
+; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
+; CHECK-PTX-NEXT: ld.b32 %r2, [%rd3];
+; CHECK-PTX-NEXT: ld.s8 %r3, [%rd3+4];
+; CHECK-PTX-NEXT: ld.b8 %rs1, [%rd3+7];
+; CHECK-PTX-NEXT: st.local.b8 [%SPL+2], %rs1;
+; CHECK-PTX-NEXT: ld.b8 %rs2, [%rd3+5];
+; CHECK-PTX-NEXT: ld.b8 %rs3, [%rd3+6];
+; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8;
+; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2;
+; CHECK-PTX-NEXT: st.local.b16 [%SPL], %rs5;
+; CHECK-PTX-NEXT: ld.b64 %rd4, [%rd3+8];
; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
-; CHECK-PTX-NEXT: cvt.u64.u32 %rd6, %r5;
-; CHECK-PTX-NEXT: add.s64 %rd7, %rd6, %rd5;
-; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %rd7;
+; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r5;
+; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4;
+; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %rd6;
; CHECK-PTX-NEXT: ret;
entry:
%vlist = alloca ptr, align 8
@@ -201,28 +201,28 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
-; CHECK-PTX-NEXT: .reg .b16 %rs<4>;
+; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
; CHECK-PTX-NEXT: .reg .b64 %rd<3>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot3;
-; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
-; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 0;
; CHECK-PTX-NEXT: ld.global.nc.b8 %rs1, [__const_$_bar_$_s1+7];
-; CHECK-PTX-NEXT: st.local.b8 [%rd1+2], %rs1;
+; CHECK-PTX-NEXT: st.local.b8 [%SPL+2], %rs1;
; CHECK-PTX-NEXT: ld.global.nc.b8 %rs2, [__const_$_bar_$_s1+6];
-; CHECK-PTX-NEXT: st.local.b8 [%rd1+1], %rs2;
-; CHECK-PTX-NEXT: ld.global.nc.b8 %rs3, [__const_$_bar_$_s1+5];
-; CHECK-PTX-NEXT: st.local.b8 [%rd1], %rs3;
-; CHECK-PTX-NEXT: st.b32 [%SP+8], 1;
-; CHECK-PTX-NEXT: st.b8 [%SP+12], 1;
-; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
+; CHECK-PTX-NEXT: shl.b16 %rs3, %rs2, 8;
+; CHECK-PTX-NEXT: ld.global.nc.b8 %rs4, [__const_$_bar_$_s1+5];
+; CHECK-PTX-NEXT: or.b16 %rs5, %rs3, %rs4;
+; CHECK-PTX-NEXT: st.local.b16 [%SPL], %rs5;
+; CHECK-PTX-NEXT: st.local.b32 [%SPL+8], 1;
+; CHECK-PTX-NEXT: st.local.b8 [%SPL+12], 1;
+; CHECK-PTX-NEXT: st.local.b64 [%SPL+16], 1;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 8;
+; CHECK-PTX-NEXT: cvta.local.u64 %rd2, %rd1;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: add.u64 %rd2, %SP, 8;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd2;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1);
@@ -283,18 +283,18 @@ define dso_local i32 @baz() {
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<2>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<3>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5;
-; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
-; CHECK-PTX-NEXT: st.v4.b32 [%SP], {1, 1, 1, 1};
+; CHECK-PTX-NEXT: st.local.v4.b32 [%SPL], {1, 1, 1, 1};
+; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 0;
+; CHECK-PTX-NEXT: cvta.local.u64 %rd2, %rd1;
; CHECK-PTX-NEXT: { // callseq 2, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd2;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
@@ -348,27 +348,24 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
-; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<5>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot7;
-; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
-; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 0;
-; CHECK-PTX-NEXT: ld.global.nc.b64 %rd2, [__const_$_qux_$_s+8];
-; CHECK-PTX-NEXT: st.local.b64 [%rd1+8], %rd2;
-; CHECK-PTX-NEXT: ld.global.nc.b64 %rd3, [__const_$_qux_$_s];
-; CHECK-PTX-NEXT: st.local.b64 [%rd1], %rd3;
-; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
+; CHECK-PTX-NEXT: ld.global.nc.b64 %rd1, [__const_$_qux_$_s+8];
+; CHECK-PTX-NEXT: st.local.b64 [%SPL+8], %rd1;
+; CHECK-PTX-NEXT: ld.global.nc.b64 %rd2, [__const_$_qux_$_s];
+; CHECK-PTX-NEXT: st.local.b64 [%SPL], %rd2;
+; CHECK-PTX-NEXT: st.local.b64 [%SPL+16], 1;
+; CHECK-PTX-NEXT: add.u64 %rd3, %SPL, 16;
+; CHECK-PTX-NEXT: cvta.local.u64 %rd4, %rd3;
; CHECK-PTX-NEXT: { // callseq 3, 0
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: add.u64 %rd4, %SP, 16;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4;
-; CHECK-PTX-NEXT: ld.local.b64 %rd5, [%rd1+8];
-; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd5;
-; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd1];
-; CHECK-PTX-NEXT: st.param.b64 [param0], %rd6;
+; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-PTX-NEXT: st.param.b64 [param0], %rd2;
; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1);
; CHECK-PTX-NEXT: } // callseq 3
; CHECK-PTX-NEXT: ret;
diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
index fa42481016540..5d9b4ee6f5297 100644
--- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
+++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
@@ -9,8 +9,7 @@
; CHECK: .loc 1 5 3 // t.c:5:3
; CHECK: { // callseq 0, 0
; CHECK: .param .b64 param0;
-; CHECK: add.u64 %rd1, %SP, 0;
-; CHECK: st.param.b64 [param0], %rd1;
+; CHECK: st.param.b64 [param0], %rd2;
; CHECK: call.uni escape_foo, (param0);
; CHECK: } // callseq 0
; CHECK: .loc 1 6 1 // t.c:6:1
diff --git a/llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll b/llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll
index 41734f33213e8..ffab7a1b6cfc6 100644
--- a/llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll
+++ b/llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll
@@ -22,7 +22,7 @@
; CHECK: DEBUG_VALUE: foo:i <- 3
; CHECK: DEBUG_VALUE: foo:i <- 7
; CHECK: DEBUG_VALUE: foo:i <- %
-; CHECK: DEBUG_VALUE: foo:i <- [DW_OP_deref] $vrdepot
+; CHECK: DEBUG_VALUE: foo:i <- [DW_OP_deref] %
; Function Attrs: nounwind ssp uwtable
define i32 @foo() #0 !dbg !4 {
>From 7eb8b83a4a30550ee5ded74f29660f83d5dabc92 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Thu, 21 Aug 2025 18:43:11 +0000
Subject: [PATCH 02/14] format
---
llvm/lib/IR/Verifier.cpp | 2 +-
llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 2 +-
llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 3 ++-
llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 3 ++-
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 8 +++++---
5 files changed, 11 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 2afa2bda53b52..2a4d4bfa1a4c7 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -114,13 +114,13 @@
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
-#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ModRef.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 03412edb9e23c..4477be9b65004 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -24,7 +24,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/NVPTXAddrSpace.h"
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Function.h"
@@ -34,6 +33,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
using namespace llvm;
using namespace NVPTXAS;
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 2b18ca9dd774a..050ba38b7fcd2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -529,7 +529,8 @@ void copyByValParam(Function &F, Argument &Arg) {
// the use of the byval parameter with this alloca instruction.
AllocA->setAlignment(
Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
- auto *AddressSpaceCast = IRB.CreateAddrSpaceCast(AllocA, Arg.getType(), Arg.getName());
+ auto *AddressSpaceCast =
+ IRB.CreateAddrSpaceCast(AllocA, Arg.getType(), Arg.getName());
Arg.replaceAllUsesWith(AddressSpaceCast);
CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg);
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 0c56caeadcffe..04eb095f31180 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -119,7 +119,8 @@ bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).getImm();
// Using I0 as the frame pointer
- MI.getOperand(FIOperandNum).ChangeToRegister(getFrameLocalRegister(MF), false);
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(getFrameLocalRegister(MF), false);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return false;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 209dd51a44a61..0874aec69fb40 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -507,7 +507,8 @@ void NVPTXPassConfig::addMachineSSAOptimization() {
bool NVPTXTargetMachine::isCompatibleDataLayout(
const DataLayout &Candidate) const {
- //XXX: Should we enforce that the Candidate DataLayout has the same address space for allocas?
+ // XXX: Should we enforce that the Candidate DataLayout has the same address
+ // space for allocas?
if (DL == Candidate)
return true;
@@ -520,9 +521,10 @@ bool NVPTXTargetMachine::isCompatibleDataLayout(
return NewDL == Candidate;
}
-unsigned NVPTXTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
+unsigned
+NVPTXTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
if (Kind == PseudoSourceValue::FixedStack) {
return ADDRESS_SPACE_LOCAL;
}
return CodeGenTargetMachineImpl::getAddressSpaceForPseudoSourceKind(Kind);
-}
+}
\ No newline at end of file
>From 1c642db0d6fae1f2216ecdca71e662e623fa6bc4 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Fri, 22 Aug 2025 10:19:18 +0000
Subject: [PATCH 03/14] Function -> Module Pass
---
llvm/lib/Target/NVPTX/NVPTX.h | 2 +-
llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 28 ++++++++++++----------
2 files changed, 16 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 77a0e03d4075a..b91af4d5a6305 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -48,7 +48,7 @@ MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerArgsPass();
-FunctionPass *createNVPTXLowerAllocaPass();
+ModulePass *createNVPTXLowerAllocaPass();
FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable,
bool NoTrapAfterNoreturn);
FunctionPass *createNVPTXTagInvariantLoadsPass();
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 4477be9b65004..100d82219512e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -39,13 +39,14 @@ using namespace llvm;
using namespace NVPTXAS;
namespace {
-class NVPTXLowerAlloca : public FunctionPass {
- bool runOnFunction(Function &F) override;
- bool doInitialization(Module &M) override;
+class NVPTXLowerAlloca : public ModulePass {
+ bool changeDataLayout(Module &M);
+ bool lowerFunctionAllocas(Function &F);
public:
- static char ID; // Pass identification, replacement for typeid
- NVPTXLowerAlloca() : FunctionPass(ID) {}
+ static char ID;
+ NVPTXLowerAlloca() : ModulePass(ID) {}
+ bool runOnModule(Module &M) override;
StringRef getPassName() const override {
return "convert address space of alloca'ed memory to local";
}
@@ -57,13 +58,14 @@ char NVPTXLowerAlloca::ID = 1;
INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", "Lower Alloca", false,
false)
-// =============================================================================
-// Main function for this pass.
-// =============================================================================
-bool NVPTXLowerAlloca::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
+bool NVPTXLowerAlloca::runOnModule(Module &M) {
+ bool Changed = changeDataLayout(M);
+ for (auto &F : M)
+ Changed |= lowerFunctionAllocas(F);
+ return Changed;
+}
+bool NVPTXLowerAlloca::lowerFunctionAllocas(Function &F) {
SmallVector<AllocaInst *, 16> Allocas;
for (auto &BB : F)
for (auto &I : BB)
@@ -103,7 +105,7 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
return true;
}
-bool NVPTXLowerAlloca::doInitialization(Module &M) {
+bool NVPTXLowerAlloca::changeDataLayout(Module &M) {
const auto &DL = M.getDataLayout();
if (DL.getAllocaAddrSpace() == ADDRESS_SPACE_LOCAL)
return false;
@@ -115,6 +117,6 @@ bool NVPTXLowerAlloca::doInitialization(Module &M) {
return true;
}
-FunctionPass *llvm::createNVPTXLowerAllocaPass() {
+ModulePass *llvm::createNVPTXLowerAllocaPass() {
return new NVPTXLowerAlloca();
}
>From dd554137b792a7c70e83adc5918241877b6d5180 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Fri, 22 Aug 2025 10:23:16 +0000
Subject: [PATCH 04/14] assert -> fatal_error
---
llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 100d82219512e..f4204294fe37e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -33,6 +33,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/NVPTXAddrSpace.h"
using namespace llvm;
@@ -44,7 +45,7 @@ class NVPTXLowerAlloca : public ModulePass {
bool lowerFunctionAllocas(Function &F);
public:
- static char ID;
+ static char ID;
NVPTXLowerAlloca() : ModulePass(ID) {}
bool runOnModule(Module &M) override;
StringRef getPassName() const override {
@@ -112,7 +113,8 @@ bool NVPTXLowerAlloca::changeDataLayout(Module &M) {
auto DLStr = DL.getStringRepresentation();
auto AddrSpaceStr = "A" + std::to_string(ADDRESS_SPACE_LOCAL);
- assert(!StringRef(DLStr).contains("A") && "DataLayout should not contain A");
+ if (StringRef(DLStr).contains("A"))
+ report_fatal_error("DataLayout should not contain A");
M.setDataLayout(DLStr.empty() ? AddrSpaceStr : DLStr + "-" + AddrSpaceStr);
return true;
}
>From 94ee8c629b92a1a5a7e8d1a260608064b1192c70 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Fri, 22 Aug 2025 10:37:30 +0000
Subject: [PATCH 05/14] Remove SP
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 12 +++++-------
llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp | 16 +++-------------
llvm/test/CodeGen/NVPTX/f32x2-instructions.ll | 1 -
llvm/test/CodeGen/NVPTX/indirect_byval.ll | 2 --
llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 8 --------
.../CodeGen/NVPTX/lower-args-gridconstant.ll | 1 -
llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 9 ---------
llvm/test/CodeGen/NVPTX/variadics-backend.ll | 5 -----
8 files changed, 8 insertions(+), 46 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 8528cb41d9244..9ceabcbdd9c88 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -80,6 +80,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -1485,13 +1486,10 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
<< DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
if (static_cast<const NVPTXTargetMachine &>(MF.getTarget())
- .getPointerSize(ADDRESS_SPACE_LOCAL) == 8) {
- O << "\t.reg .b64 \t%SP;\n"
- << "\t.reg .b64 \t%SPL;\n";
- } else {
- O << "\t.reg .b32 \t%SP;\n"
- << "\t.reg .b32 \t%SPL;\n";
- }
+ .getPointerSize(ADDRESS_SPACE_LOCAL) == 8)
+ O << "\t.reg .b64 \t%SPL;\n";
+ else
+ O << "\t.reg .b32 \t%SPL;\n";
}
// Go through all virtual registers to establish the mapping between the
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 78f18a93b869b..6e4817f09db6c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -46,21 +46,11 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
// Emits
// mov %SPL, %depot;
- // cvta.local %SP, %SPL;
// for local address accesses in MF.
- bool Is64Bit = static_cast<const NVPTXTargetMachine &>(MF.getTarget())
- .getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
- unsigned CvtaLocalOpcode =
- (Is64Bit ? NVPTX::cvta_local_64 : NVPTX::cvta_local);
+ bool IsLocal64Bit = static_cast<const NVPTXTargetMachine &>(MF.getTarget())
+ .getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
unsigned MovDepotOpcode =
- (Is64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
- if (!MR.use_empty(NRI->getFrameRegister(MF))) {
- // If %SP is not used, do not bother emitting "cvta.local %SP, %SPL".
- MBBI = BuildMI(MBB, MBBI, dl,
- MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
- NRI->getFrameRegister(MF))
- .addReg(NRI->getFrameLocalRegister(MF));
- }
+ (IsLocal64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
if (!MR.use_empty(NRI->getFrameLocalRegister(MF))) {
BuildMI(MBB, MBBI, dl,
MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index cf00c2c9eaa3f..9b870c731cd0d 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -79,7 +79,6 @@ define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
; CHECK-NOF32X2-LABEL: test_extract_i(
; CHECK-NOF32X2: {
; CHECK-NOF32X2-NEXT: .local .align 8 .b8 __local_depot3[8];
-; CHECK-NOF32X2-NEXT: .reg .b64 %SP;
; CHECK-NOF32X2-NEXT: .reg .b64 %SPL;
; CHECK-NOF32X2-NEXT: .reg .b32 %r<4>;
; CHECK-NOF32X2-NEXT: .reg .b64 %rd<6>;
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
index 39813efef9b64..a27706854222d 100644
--- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -13,7 +13,6 @@ define internal i32 @foo() {
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .local .align 1 .b8 __local_depot0[2];
-; CHECK-NEXT: .reg .b64 %SP;
; CHECK-NEXT: .reg .b64 %SPL;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<2>;
@@ -50,7 +49,6 @@ define internal i32 @bar() {
; CHECK: // @bar
; CHECK-NEXT: {
; CHECK-NEXT: .local .align 8 .b8 __local_depot1[16];
-; CHECK-NEXT: .reg .b64 %SP;
; CHECK-NEXT: .reg .b64 %SPL;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<5>;
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 3e6f5e37aa85f..35bfde0dc2202 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -10,7 +10,6 @@ define void @foo(i32 %a) {
; PTX32-LABEL: foo(
; PTX32: {
; PTX32-NEXT: .local .align 4 .b8 __local_depot0[4];
-; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
; PTX32-NEXT: .reg .b32 %r<2>;
; PTX32-EMPTY:
@@ -23,7 +22,6 @@ define void @foo(i32 %a) {
; PTX64-LABEL: foo(
; PTX64: {
; PTX64-NEXT: .local .align 4 .b8 __local_depot0[4];
-; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
; PTX64-EMPTY:
@@ -41,7 +39,6 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX32-LABEL: foo2(
; PTX32: {
; PTX32-NEXT: .local .align 4 .b8 __local_depot1[4];
-; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
; PTX32-NEXT: .reg .b32 %r<4>;
; PTX32-EMPTY:
@@ -61,7 +58,6 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX64-LABEL: foo2(
; PTX64: {
; PTX64-NEXT: .local .align 4 .b8 __local_depot1[4];
-; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
; PTX64-NEXT: .reg .b64 %rd<3>;
@@ -90,7 +86,6 @@ define void @foo3(i32 %a) {
; PTX32-LABEL: foo3(
; PTX32: {
; PTX32-NEXT: .local .align 4 .b8 __local_depot2[12];
-; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
; PTX32-NEXT: .reg .b32 %r<5>;
; PTX32-EMPTY:
@@ -106,7 +101,6 @@ define void @foo3(i32 %a) {
; PTX64-LABEL: foo3(
; PTX64: {
; PTX64-NEXT: .local .align 4 .b8 __local_depot2[12];
-; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
; PTX64-NEXT: .reg .b64 %rd<3>;
@@ -128,7 +122,6 @@ define void @foo4() {
; PTX32-LABEL: foo4(
; PTX32: {
; PTX32-NEXT: .local .align 4 .b8 __local_depot3[8];
-; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
; PTX32-NEXT: .reg .b32 %r<5>;
; PTX32-EMPTY:
@@ -155,7 +148,6 @@ define void @foo4() {
; PTX64-LABEL: foo4(
; PTX64: {
; PTX64-NEXT: .local .align 4 .b8 __local_depot3[8];
-; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b64 %rd<5>;
; PTX64-EMPTY:
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index f4a24ab41ba17..2cfca3f6b3174 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -149,7 +149,6 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-LABEL: multiple_grid_const_escape(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b32 %r<2>;
; PTX-NEXT: .reg .b64 %rd<8>;
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index d8a9530e931a6..246eaf8a55538 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -136,7 +136,6 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
; PTX-LABEL: escape_ptr(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot2[8];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b64 %rd<7>;
; PTX-EMPTY:
@@ -175,7 +174,6 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
; PTX-LABEL: escape_ptr_gep(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot3[8];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b64 %rd<8>;
; PTX-EMPTY:
@@ -215,7 +213,6 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
; PTX-LABEL: escape_ptr_store(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot4[8];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b64 %rd<9>;
; PTX-EMPTY:
@@ -252,7 +249,6 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
; PTX-LABEL: escape_ptr_gep_store(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot5[8];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b64 %rd<10>;
; PTX-EMPTY:
@@ -291,7 +287,6 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
; PTX-LABEL: escape_ptrtoint(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot6[8];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b64 %rd<9>;
; PTX-EMPTY:
@@ -448,7 +443,6 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
; PTX-LABEL: memcpy_to_param(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot9[8];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b16 %rs<17>;
; PTX-NEXT: .reg .b64 %rd<8>;
@@ -628,7 +622,6 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
; PTX-LABEL: test_select_write(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot12[8];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .pred %p<2>;
; PTX-NEXT: .reg .b16 %rs<3>;
@@ -806,7 +799,6 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
; PTX-LABEL: test_phi_write(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot14[8];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .pred %p<2>;
; PTX-NEXT: .reg .b16 %rs<3>;
@@ -858,7 +850,6 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-LABEL: test_forward_byval_arg(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot15[4];
-; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b32 %r<2>;
; PTX-EMPTY:
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index 7fe0dda2bf94e..e17d7d0b28269 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -102,7 +102,6 @@ define dso_local i32 @foo() {
; CHECK-PTX-LABEL: foo(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot1[40];
-; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
; CHECK-PTX-NEXT: .reg .b64 %rd<3>;
@@ -139,7 +138,6 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-LABEL: variadics2(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4];
-; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<6>;
@@ -199,7 +197,6 @@ define dso_local i32 @bar() {
; CHECK-PTX-LABEL: bar(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
-; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
@@ -280,7 +277,6 @@ define dso_local i32 @baz() {
; CHECK-PTX-LABEL: baz(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 16 .b8 __local_depot5[16];
-; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
; CHECK-PTX-NEXT: .reg .b64 %rd<3>;
@@ -346,7 +342,6 @@ define dso_local void @qux() {
; CHECK-PTX-LABEL: qux(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
-; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b64 %rd<5>;
; CHECK-PTX-EMPTY:
>From b37221cf6d3e53d914143106fb9e968717bd671f Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Fri, 22 Aug 2025 10:40:02 +0000
Subject: [PATCH 06/14] Fix pointer size query
---
llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 04eb095f31180..709a5832bb784 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -128,7 +128,7 @@ bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const NVPTXTargetMachine &TM =
static_cast<const NVPTXTargetMachine &>(MF.getTarget());
- return TM.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8
+ return TM.getPointerSize(NVPTXAS::ADDRESS_SPACE_GENERIC) == 8
? NVPTX::VRFrame64
: NVPTX::VRFrame32;
}
>From 4a15fd4f4856fb98cf040623d90c6e8e90c427e9 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Fri, 22 Aug 2025 12:30:02 +0000
Subject: [PATCH 07/14] [clang] modify backend datalayout check
---
clang/lib/CodeGen/BackendUtil.cpp | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 0b8b824fbcd5a..8198f9c58340c 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1444,15 +1444,16 @@ void clang::emitBackendOutput(CompilerInstance &CI, CodeGenOptions &CGOpts,
// Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's
// DataLayout.
- if (AsmHelper.TM) {
- std::string DLDesc = M->getDataLayout().getStringRepresentation();
- if (DLDesc != TDesc) {
+ if (AsmHelper.TM)
+ if (!AsmHelper.TM->isCompatibleDataLayout(M->getDataLayout()) ||
+ !AsmHelper.TM->isCompatibleDataLayout(DataLayout(TDesc))) {
+ std::string DLDesc = M->getDataLayout().getStringRepresentation();
unsigned DiagID = Diags.getCustomDiagID(
- DiagnosticsEngine::Error, "backend data layout '%0' does not match "
- "expected target description '%1'");
+ DiagnosticsEngine::Error,
+ "backend data layout '%0' is not compatible with "
+ "expected target description '%1'");
Diags.Report(DiagID) << DLDesc << TDesc;
}
- }
}
// With -fembed-bitcode, save a copy of the llvm IR as data in the
>From edcfce7a7e84cf21b989e7b3ed0bc1d013e22910 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Mon, 25 Aug 2025 14:10:56 +0000
Subject: [PATCH 08/14] Address comments
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 9 +++----
llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp | 4 +--
llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 28 ++++++++++----------
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 27 +++++++------------
4 files changed, 29 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 9ceabcbdd9c88..6981e664f2647 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1484,12 +1484,9 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
int64_t NumBytes = MFI.getStackSize();
if (NumBytes) {
O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
- << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
- if (static_cast<const NVPTXTargetMachine &>(MF.getTarget())
- .getPointerSize(ADDRESS_SPACE_LOCAL) == 8)
- O << "\t.reg .b64 \t%SPL;\n";
- else
- O << "\t.reg .b32 \t%SPL;\n";
+ << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"
+ << "\t.reg .b" << MF.getTarget().getPointerSizeInBits(ADDRESS_SPACE_LOCAL)
+ << " \t%SPL;\n";
}
// Go through all virtual registers to establish the mapping between the
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index 6e4817f09db6c..a98ee64f34eeb 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -47,8 +47,8 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
// Emits
// mov %SPL, %depot;
// for local address accesses in MF.
- bool IsLocal64Bit = static_cast<const NVPTXTargetMachine &>(MF.getTarget())
- .getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
+ bool IsLocal64Bit =
+ MF.getTarget().getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
unsigned MovDepotOpcode =
(IsLocal64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
if (!MR.use_empty(NRI->getFrameLocalRegister(MF))) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index f4204294fe37e..768e17654b195 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
@@ -68,33 +69,32 @@ bool NVPTXLowerAlloca::runOnModule(Module &M) {
bool NVPTXLowerAlloca::lowerFunctionAllocas(Function &F) {
SmallVector<AllocaInst *, 16> Allocas;
- for (auto &BB : F)
- for (auto &I : BB)
- if (auto *Alloca = dyn_cast<AllocaInst>(&I))
- if (Alloca->getAddressSpace() != ADDRESS_SPACE_LOCAL)
- Allocas.push_back(Alloca);
+ for (auto &I : instructions(F))
+ if (auto *Alloca = dyn_cast<AllocaInst>(&I))
+ if (Alloca->getAddressSpace() != ADDRESS_SPACE_LOCAL)
+ Allocas.push_back(Alloca);
if (Allocas.empty())
return false;
+ IRBuilder<> Builder(F.getContext());
for (AllocaInst *Alloca : Allocas) {
- auto *NewAlloca = new AllocaInst(
- Alloca->getAllocatedType(), ADDRESS_SPACE_LOCAL, Alloca->getArraySize(),
- Alloca->getAlign(), Alloca->getName());
- auto *Cast = new AddrSpaceCastInst(
+ Builder.SetInsertPoint(Alloca);
+ auto *NewAlloca =
+ Builder.CreateAlloca(Alloca->getAllocatedType(), ADDRESS_SPACE_LOCAL,
+ Alloca->getArraySize(), Alloca->getName());
+ NewAlloca->setAlignment(Alloca->getAlign());
+ auto *Cast = Builder.CreateAddrSpaceCast(
NewAlloca,
PointerType::get(Alloca->getAllocatedType()->getContext(),
ADDRESS_SPACE_GENERIC),
"");
- Cast->insertBefore(Alloca->getIterator());
- NewAlloca->insertBefore(Cast->getIterator());
for (auto &U : llvm::make_early_inc_range(Alloca->uses())) {
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
- if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
- II->getIntrinsicID() != Intrinsic::lifetime_end))
+ if (!II || !II->isLifetimeStartOrEnd())
continue;
- IRBuilder<> Builder(II);
+ Builder.SetInsertPoint(II);
Builder.CreateIntrinsic(II->getIntrinsicID(), {NewAlloca->getType()},
{NewAlloca});
II->eraseFromParent();
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 0874aec69fb40..01b2f874da1d4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -216,9 +216,6 @@ class NVPTXPassConfig : public TargetPassConfig {
// function is only called in opt mode.
void addEarlyCSEOrGVNPass();
- // Add passes that propagate special memory spaces.
- void addAddressSpaceInferencePasses();
-
// Add passes that perform straight-line scalar optimizations.
void addStraightLineScalarOptimizationPasses();
};
@@ -304,17 +301,6 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
addPass(createEarlyCSEPass());
}
-void NVPTXPassConfig::addAddressSpaceInferencePasses() {
- // NVPTXLowerArgs emits alloca for byval parameters which can often
- // be eliminated by SROA.
- addPass(createSROAPass());
- addPass(createNVPTXLowerAllocaPass());
- // TODO: Consider running InferAddressSpaces during opt, earlier in the
- // compilation flow.
- addPass(createInferAddressSpacesPass());
- addPass(createNVPTXAtomicLowerPass());
-}
-
void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
addPass(createSeparateConstOffsetFromGEPPass());
addPass(createSpeculativeExecutionPass());
@@ -368,11 +354,18 @@ void NVPTXPassConfig::addIRPasses() {
// NVPTXLowerArgs is required for correctness and should be run right
// before the address space inference passes.
addPass(createNVPTXLowerArgsPass());
+
+ if (getOptLevel() != CodeGenOptLevel::None)
+ // NVPTXLowerArgs emits alloca for byval parameters which can often
+ // be eliminated by SROA.
+ addPass(createSROAPass());
+ addPass(createNVPTXLowerAllocaPass());
if (getOptLevel() != CodeGenOptLevel::None) {
- addAddressSpaceInferencePasses();
+ // TODO: Consider running InferAddressSpaces during opt, earlier in the
+ // compilation flow.
+ addPass(createInferAddressSpacesPass());
+ addPass(createNVPTXAtomicLowerPass());
addStraightLineScalarOptimizationPasses();
- } else {
- addPass(createNVPTXLowerAllocaPass());
}
addPass(createAtomicExpandLegacyPass());
>From b263b5bfc5de2886786cebd1dfc4b076b0922330 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Mon, 25 Aug 2025 16:31:16 +0000
Subject: [PATCH 09/14] Fix Debug Info updating
---
llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 5 +++++
llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll | 2 +-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 768e17654b195..2f5db7b9847c0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -26,6 +26,7 @@
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
@@ -99,6 +100,10 @@ bool NVPTXLowerAlloca::lowerFunctionAllocas(Function &F) {
{NewAlloca});
II->eraseFromParent();
}
+ SmallVector<DbgVariableRecord *, 4> DbgVariableUses;
+ findDbgValues(Alloca, DbgVariableUses);
+ for (auto *Dbg : DbgVariableUses)
+ Dbg->replaceVariableLocationOp(Alloca, NewAlloca);
Alloca->replaceAllUsesWith(Cast);
Alloca->eraseFromParent();
diff --git a/llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll b/llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll
index ffab7a1b6cfc6..4ec3e056e0024 100644
--- a/llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll
+++ b/llvm/test/DebugInfo/NVPTX/dbg-value-const-byref.ll
@@ -22,7 +22,7 @@
; CHECK: DEBUG_VALUE: foo:i <- 3
; CHECK: DEBUG_VALUE: foo:i <- 7
; CHECK: DEBUG_VALUE: foo:i <- %
-; CHECK: DEBUG_VALUE: foo:i <- [DW_OP_deref] %
+; CHECK: DEBUG_VALUE: foo:i <- [DW_OP_deref] $vrdepot
; Function Attrs: nounwind ssp uwtable
define i32 @foo() #0 !dbg !4 {
>From 14df7dcc9d0d637237fd2f570884b235b4a076ca Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Tue, 26 Aug 2025 14:35:12 +0000
Subject: [PATCH 10/14] Remove NVPTXISD::DYNAMIC_STACKALLOC
---
llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 23 +-------------
llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 1 -
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 33 +++++++++++++--------
3 files changed, 22 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f35407ed99106..b22294ba52736 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1106,7 +1106,6 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(NVPTXISD::FMINNUM3)
MAKE_CASE(NVPTXISD::FMAXIMUM3)
MAKE_CASE(NVPTXISD::FMINIMUM3)
- MAKE_CASE(NVPTXISD::DYNAMIC_STACKALLOC)
MAKE_CASE(NVPTXISD::STACKRESTORE)
MAKE_CASE(NVPTXISD::STACKSAVE)
MAKE_CASE(NVPTXISD::SETP_F16X2)
@@ -1771,10 +1770,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
-
if (STI.getPTXVersion() < 73 || STI.getSmVersion() < 52) {
const Function &Fn = DAG.getMachineFunction().getFunction();
-
DAG.getContext()->diagnose(DiagnosticInfoUnsupported(
Fn,
"Support for dynamic alloca introduced in PTX ISA version 7.3 and "
@@ -1785,25 +1782,7 @@ SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops, SDLoc());
}
- SDLoc DL(Op.getNode());
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- uint64_t Align = Op.getConstantOperandVal(2);
-
- // The alignment on a ISD::DYNAMIC_STACKALLOC node may be 0 to indicate that
- // the default stack alignment should be used.
- if (Align == 0)
- Align = DAG.getSubtarget().getFrameLowering()->getStackAlign().value();
-
- // The size for ptx alloca instruction is 64-bit for m64 and 32-bit for m32.
- const MVT LocalVT = getPointerTy(DAG.getDataLayout(), ADDRESS_SPACE_LOCAL);
-
- SDValue Alloc =
- DAG.getNode(NVPTXISD::DYNAMIC_STACKALLOC, DL, {LocalVT, MVT::Other},
- {Chain, DAG.getZExtOrTrunc(Size, DL, LocalVT),
- DAG.getTargetConstant(Align, DL, MVT::i32)});
-
- return Alloc;
+ return Op;
}
SDValue NVPTXTargetLowering::LowerSTACKRESTORE(SDValue Op,
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 27f099e220976..f7da803bb9fcc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -69,7 +69,6 @@ enum NodeType : unsigned {
FMAXIMUM3,
FMINIMUM3,
- DYNAMIC_STACKALLOC,
STACKRESTORE,
STACKSAVE,
BrxStart,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 7b135098bd4c1..43f52e72f8d5b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2253,22 +2253,31 @@ def trapexitinst : NVPTXInst<(outs), (ins), "trap; exit;", [(trap)]>, Requires<[
// brkpt instruction
def debugtrapinst : BasicNVPTXInst<(outs), (ins), "brkpt", [(debugtrap)]>;
-def SDTDynAllocaOp :
- SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>]>;
+def SDTDynAllocaOp
+ : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>]>;
-def dyn_alloca :
- SDNode<"NVPTXISD::DYNAMIC_STACKALLOC", SDTDynAllocaOp,
- [SDNPHasChain, SDNPSideEffect]>;
+def getAllocaAlign : SDNodeXForm<imm, [{
+ if (N->getZExtValue() != 0)
+ return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), N->getValueType(0));
+ return CurDAG->getTargetConstant(CurDAG->getSubtarget().getFrameLowering()->getStackAlign().value(), SDLoc(N), N->getValueType(0));
+}]>;
-foreach t = [I32RT, I64RT] in {
- def DYNAMIC_STACKALLOC # t.Size :
- BasicNVPTXInst<(outs t.RC:$ptr),
- (ins t.RC:$size, i32imm:$align),
- "alloca.u" # t.Size,
- [(set t.Ty:$ptr, (dyn_alloca t.Ty:$size, timm:$align))]>,
- Requires<[hasPTX<73>, hasSM<52>]>;
+def dyn_alloca : SDNode<"ISD::DYNAMIC_STACKALLOC",
+ SDTDynAllocaOp, [SDNPHasChain, SDNPSideEffect]>;
+
+let Predicates = [hasPTX<73>, hasSM<52>] in {
+ foreach t = [I32RT, I64RT] in {
+ def DYNAMIC_STACKALLOC_#t.Size
+ : BasicNVPTXInst<(outs t.RC:$ptr), (ins t.RC:$size, i32imm:$align),
+ "alloca.u"#t.Size>;
+ }
}
+def : Pat<(i32(dyn_alloca i32:$size, imm:$align)),
+ (DYNAMIC_STACKALLOC_32 $size, (getAllocaAlign imm:$align))>;
+def : Pat<(i64(dyn_alloca i64:$size, imm:$align)),
+ (DYNAMIC_STACKALLOC_64 $size, (getAllocaAlign imm:$align))>;
+
//
// BRX
//
>From 344de623ebef575c7f9b16e59e8a2550483f019d Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Wed, 27 Aug 2025 12:11:40 +0000
Subject: [PATCH 11/14] Update NVPTXPeephole to eliminate cvta.locals
---
llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 3 +
llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp | 42 ++++-
llvm/lib/Target/NVPTX/NVPTXPeephole.cpp | 130 ++++++++++-----
.../CodeGen/NVPTX/call-with-alloca-buffer.ll | 5 +-
llvm/test/CodeGen/NVPTX/f32x2-instructions.ll | 2 +
llvm/test/CodeGen/NVPTX/indirect_byval.ll | 30 ++--
llvm/test/CodeGen/NVPTX/local-stack-frame.ll | 50 +++---
.../CodeGen/NVPTX/lower-args-gridconstant.ll | 13 +-
llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 151 ++++++++++--------
llvm/test/CodeGen/NVPTX/variadics-backend.ll | 37 +++--
.../DebugInfo/NVPTX/dbg-declare-alloca.ll | 2 +-
11 files changed, 285 insertions(+), 180 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 6981e664f2647..1fb493175e31c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1485,6 +1485,9 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
if (NumBytes) {
O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
<< DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"
+ << "\t.reg .b"
+ << MF.getTarget().getPointerSizeInBits(ADDRESS_SPACE_GENERIC)
+ << " \t%SP;\n"
<< "\t.reg .b" << MF.getTarget().getPointerSizeInBits(ADDRESS_SPACE_LOCAL)
<< " \t%SPL;\n";
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index a98ee64f34eeb..ccb475d8efc63 100644
--- a/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -46,17 +46,47 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
// Emits
// mov %SPL, %depot;
+ // cvta.local %SP, %SPL;
// for local address accesses in MF.
+ // if the generic and local address spaces are different,
+ // it emits:
+ // mov %SPL, %depot;
+ // cvt.u64.u32 %SP, %SPL;
+ // cvta.local %SP, %SP;
+
+ if (MR.use_empty(NRI->getFrameLocalRegister(MF)))
+ // If %SPL is not used, do not bother emitting anything
+ return;
bool IsLocal64Bit =
MF.getTarget().getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
+ bool IsGeneric64Bit =
+ MF.getTarget().getPointerSize(NVPTXAS::ADDRESS_SPACE_GENERIC) == 8;
+ bool NeedsCast = IsGeneric64Bit != IsLocal64Bit;
+ Register SourceReg = NRI->getFrameLocalRegister(MF);
+ if (NeedsCast)
+ SourceReg = NRI->getFrameRegister(MF);
+
+ unsigned CvtaLocalOpcode =
+ (IsGeneric64Bit ? NVPTX::cvta_local_64 : NVPTX::cvta_local);
+
+ MBBI = BuildMI(MBB, MBBI, dl,
+ MF.getSubtarget().getInstrInfo()->get(CvtaLocalOpcode),
+ NRI->getFrameRegister(MF))
+ .addReg(SourceReg);
+
+ if (NeedsCast)
+ MBBI = BuildMI(MBB, MBBI, dl,
+ MF.getSubtarget().getInstrInfo()->get(NVPTX::CVT_u64_u32),
+ NRI->getFrameRegister(MF))
+ .addReg(NRI->getFrameLocalRegister(MF))
+ .addImm(NVPTX::PTXCvtMode::NONE);
+
unsigned MovDepotOpcode =
(IsLocal64Bit ? NVPTX::MOV_DEPOT_ADDR_64 : NVPTX::MOV_DEPOT_ADDR);
- if (!MR.use_empty(NRI->getFrameLocalRegister(MF))) {
- BuildMI(MBB, MBBI, dl,
- MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
- NRI->getFrameLocalRegister(MF))
- .addImm(MF.getFunctionNumber());
- }
+ BuildMI(MBB, MBBI, dl,
+ MF.getSubtarget().getInstrInfo()->get(MovDepotOpcode),
+ NRI->getFrameLocalRegister(MF))
+ .addImm(MF.getFunctionNumber());
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
index e9b0aaeca4964..9223712537fd2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -12,22 +12,25 @@
// mov %SPL, %depot
// cvta.local %SP, %SPL
//
-// Because Frame Index is a generic address and alloca can only return generic
-// pointer, without this pass the instructions producing alloca'ed address will
-// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
-// this address with their .local versions, but this may introduce a lot of
-// cvta.to.local instructions. Performance can be improved if we avoid casting
-// address back and forth and directly calculate local address based on %SPL.
+// Allocas are local addresses, if multiple alloca addresses need to be
+// converted to generic ones, then multiple cvta.local instructions will be
+// emitted. To eliminate these redundant cvta.local instructions, we need to
+// combine them into a single cvta.local instruction.
+//
// This peephole pass optimizes these cases, for example
//
// It will transform the following pattern
-// %0 = LEA_ADDRi64 %VRFrame64, 4
-// %1 = cvta_to_local_64 %0
+// %0 = LEA_ADDRi64 %VRFrameLocal64, 0
+// %1 = LEA_ADDRi64 %VRFrameLocal64, 4
+// %2 = cvta_to_local_64 %0
+// %3 = cvta_to_local_64 %1
//
// into
-// %1 = LEA_ADDRi64 %VRFrameLocal64, 4
+// %0 = LEA_ADDRi64 %VRFrame64, 0
+// %1 = LEA_ADDRi64 %VRFrame64, 4
//
// %VRFrameLocal64 is the virtual register name of %SPL
+// %VRFrame64 is the virtual register name of %SP
//
//===----------------------------------------------------------------------===//
@@ -66,25 +69,36 @@ char NVPTXPeephole::ID = 0;
INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
-static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
+static bool isCVTALocalCombinationCandidate(MachineInstr &Root) {
auto &MBB = *Root.getParent();
auto &MF = *MBB.getParent();
- // Check current instruction is cvta.to.local
- if (Root.getOpcode() != NVPTX::cvta_to_local_64 &&
- Root.getOpcode() != NVPTX::cvta_to_local)
+ // Check current instruction is cvta.local
+ if (Root.getOpcode() != NVPTX::cvta_local_64 &&
+ Root.getOpcode() != NVPTX::cvta_local)
return false;
auto &Op = Root.getOperand(1);
const auto &MRI = MF.getRegInfo();
- MachineInstr *GenericAddrDef = nullptr;
+ MachineInstr *LocalAddrDef = nullptr;
if (Op.isReg() && Op.getReg().isVirtual()) {
- GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
+ LocalAddrDef = MRI.getUniqueVRegDef(Op.getReg());
+ }
+
+ if (!LocalAddrDef || LocalAddrDef->getParent() != &MBB)
+ return false;
+
+ // With -nvptx-short-ptr there's an extra cvta.u64.u32 instruction
+ // between the LEA_ADDRi and the cvta.local.
+ if (LocalAddrDef->getOpcode() == NVPTX::CVT_u64_u32) {
+ auto &Op = LocalAddrDef->getOperand(1);
+ if (Op.isReg() && Op.getReg().isVirtual())
+ LocalAddrDef = MRI.getUniqueVRegDef(Op.getReg());
}
// Check the register operand is uniquely defined by LEA_ADDRi instruction
- if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
- (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
- GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
+ if (!LocalAddrDef || LocalAddrDef->getParent() != &MBB ||
+ (LocalAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
+ LocalAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
return false;
}
@@ -92,37 +106,60 @@ static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
// Check the LEA_ADDRi operand is Frame index
- auto &BaseAddrOp = GenericAddrDef->getOperand(1);
- if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
+ auto &BaseAddrOp = LocalAddrDef->getOperand(1);
+ if (BaseAddrOp.isReg() &&
+ BaseAddrOp.getReg() == NRI->getFrameLocalRegister(MF)) {
return true;
}
return false;
}
-static void CombineCVTAToLocal(MachineInstr &Root) {
- auto &MBB = *Root.getParent();
+static void CombineCVTALocal(MachineInstr &CVTALocalInstr) {
+ auto &MBB = *CVTALocalInstr.getParent();
auto &MF = *MBB.getParent();
const auto &MRI = MF.getRegInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
- auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+ auto *LeaInstr = MRI.getUniqueVRegDef(CVTALocalInstr.getOperand(1).getReg());
+ MachineInstr *CVTInstr = nullptr;
+ if (LeaInstr->getOpcode() == NVPTX::CVT_u64_u32) {
+ CVTInstr = LeaInstr;
+ LeaInstr = MRI.getUniqueVRegDef(LeaInstr->getOperand(1).getReg());
+ assert((LeaInstr->getOpcode() == NVPTX::LEA_ADDRi64 ||
+ LeaInstr->getOpcode() == NVPTX::LEA_ADDRi) &&
+ "Expected LEA_ADDRi64 or LEA_ADDRi");
+ }
const NVPTXRegisterInfo *NRI =
MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
MachineInstrBuilder MIB =
- BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
- Root.getOperand(0).getReg())
- .addReg(NRI->getFrameLocalRegister(MF))
- .add(Prev.getOperand(2));
-
- MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
-
- // Check if MRI has only one non dbg use, which is Root
- if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
- Prev.eraseFromParent();
+ BuildMI(MF, CVTALocalInstr.getDebugLoc(), TII->get(LeaInstr->getOpcode()),
+ CVTALocalInstr.getOperand(0).getReg())
+ .addReg(NRI->getFrameRegister(MF))
+ .add(LeaInstr->getOperand(2));
+
+ MBB.insert((MachineBasicBlock::iterator)&CVTALocalInstr, MIB);
+
+ // Check if we can erase the cvta.u64.u32 or LEA_ADDRi instructions
+ if (CVTInstr) {
+ // Check if the cvta.u64.u32 instruction has only one non dbg use
+ // which is the cvta.local instruction.
+ if (MRI.hasOneNonDBGUse(CVTInstr->getOperand(0).getReg()))
+ CVTInstr->eraseFromParent();
+
+ // Check if the LEA_ADDRi instruction has no other non dbg uses
+ // (i.e. cvta.u64.u32 was the only non dbg use)
+ if (MRI.use_nodbg_empty(CVTInstr->getOperand(0).getReg()))
+ CVTInstr->eraseFromParent();
+
+ } else if (MRI.hasOneNonDBGUse(LeaInstr->getOperand(0).getReg())) {
+ // Check if the LEA_ADDRi instruction has only one non dbg use
+ // which is the cvta.u64.u32 instruction.
+ LeaInstr->eraseFromParent();
}
- Root.eraseFromParent();
+
+ CVTALocalInstr.eraseFromParent();
}
bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
@@ -137,8 +174,8 @@ bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
while (BlockIter != MBB.end()) {
auto &MI = *BlockIter++;
- if (isCVTAToLocalCombinationCandidate(MI)) {
- CombineCVTAToLocal(MI);
+ if (isCVTALocalCombinationCandidate(MI)) {
+ CombineCVTALocal(MI);
Changed = true;
}
} // Instruction
@@ -147,13 +184,24 @@ bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
const NVPTXRegisterInfo *NRI =
MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
- // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
const auto &MRI = MF.getRegInfo();
- if (MRI.use_empty(NRI->getFrameRegister(MF))) {
- if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
+
+ // Remove unnecessary %VRFrame = cvta.local %VRFrame
+ if (MRI.hasOneNonDBGUse(NRI->getFrameRegister(MF)))
+ if (auto *MI = MRI.getOneNonDBGUser(NRI->getFrameRegister(MF)))
+ if (MI->getOpcode() == NVPTX::cvta_local_64 ||
+ MI->getOpcode() == NVPTX::cvta_local)
+ MI->eraseFromParent();
+
+ // Remove unnecessary %VRFrame = cvt.u64.u32 %VRFrameLocal
+ if (MRI.use_empty(NRI->getFrameRegister(MF)))
+ if (auto *MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF)))
+ MI->eraseFromParent();
+
+ // Remove unnecessary %VRFrameLocal = LEA_ADDRi %depot
+ if (MRI.use_empty(NRI->getFrameLocalRegister(MF)))
+ if (auto *MI = MRI.getUniqueVRegDef(NRI->getFrameLocalRegister(MF)))
MI->eraseFromParent();
- }
- }
return Changed;
}
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index e825f8ef19949..eee600a8c98fe 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -25,8 +25,7 @@ entry:
; CHECK: ld.param.b64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]
-; CHECK: add.u64 %rd[[SP_REG0:[0-9]+]], %SPL, 0
-; CHECK: cvta.local.u64 %rd[[SP_REG:[0-9]+]], %rd[[SP_REG0]];
+; CHECK: add.u64 %rd[[SP_REG0:[0-9]+]], %SP, 0
; CHECK: ld.global.b32 %r[[A0_REG:[0-9]+]], [%rd[[A1_REG]]]
; CHECK: st.local.b32 [%SPL], %r[[A0_REG]]
@@ -48,7 +47,7 @@ entry:
; CHECK-DAG: .param .b64 param0;
; CHECK-DAG: .param .b64 param1;
; CHECK-DAG: st.param.b64 [param0], %rd[[A_REG]]
-; CHECK-DAG: st.param.b64 [param1], %rd[[SP_REG]]
+; CHECK-DAG: st.param.b64 [param1], %rd[[SP_REG0]]
; CHECK: call.uni callee,
call void @callee(ptr %a, ptr %buf) #2
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index 9b870c731cd0d..d07755a7aa85b 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -79,12 +79,14 @@ define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
; CHECK-NOF32X2-LABEL: test_extract_i(
; CHECK-NOF32X2: {
; CHECK-NOF32X2-NEXT: .local .align 8 .b8 __local_depot3[8];
+; CHECK-NOF32X2-NEXT: .reg .b64 %SP;
; CHECK-NOF32X2-NEXT: .reg .b64 %SPL;
; CHECK-NOF32X2-NEXT: .reg .b32 %r<4>;
; CHECK-NOF32X2-NEXT: .reg .b64 %rd<6>;
; CHECK-NOF32X2-EMPTY:
; CHECK-NOF32X2-NEXT: // %bb.0:
; CHECK-NOF32X2-NEXT: mov.b64 %SPL, __local_depot3;
+; CHECK-NOF32X2-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_i_param_0];
; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
; CHECK-NOF32X2-NEXT: st.local.v2.b32 [%SPL], {%r1, %r2};
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
index a27706854222d..c524adad3c262 100644
--- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -13,25 +13,26 @@ define internal i32 @foo() {
; CHECK-LABEL: foo(
; CHECK: {
; CHECK-NEXT: .local .align 1 .b8 __local_depot0[2];
+; CHECK-NEXT: .reg .b64 %SP;
; CHECK-NEXT: .reg .b64 %SPL;
; CHECK-NEXT: .reg .b16 %rs<2>;
; CHECK-NEXT: .reg .b32 %r<2>;
-; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: mov.b64 %SPL, __local_depot0;
-; CHECK-NEXT: add.u64 %rd1, %SPL, 0;
-; CHECK-NEXT: cvta.local.u64 %rd2, %rd1;
-; CHECK-NEXT: ld.global.b64 %rd3, [ptr];
+; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
+; CHECK-NEXT: add.u64 %rd1, %SP, 0;
+; CHECK-NEXT: ld.global.b64 %rd2, [ptr];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 1 .b8 param0[1];
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-NEXT: st.param.b64 [param1], %rd1;
; CHECK-NEXT: ld.local.b8 %rs1, [%SPL+1];
; CHECK-NEXT: st.param.b8 [param0], %rs1;
; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
-; CHECK-NEXT: call (retval0), %rd3, (param0, param1), prototype_0;
+; CHECK-NEXT: call (retval0), %rd2, (param0, param1), prototype_0;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
@@ -49,24 +50,25 @@ define internal i32 @bar() {
; CHECK: // @bar
; CHECK-NEXT: {
; CHECK-NEXT: .local .align 8 .b8 __local_depot1[16];
+; CHECK-NEXT: .reg .b64 %SP;
; CHECK-NEXT: .reg .b64 %SPL;
; CHECK-NEXT: .reg .b32 %r<2>;
-; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: mov.b64 %SPL, __local_depot1;
-; CHECK-NEXT: add.u64 %rd1, %SPL, 0;
-; CHECK-NEXT: cvta.local.u64 %rd2, %rd1;
-; CHECK-NEXT: ld.global.b64 %rd3, [ptr];
+; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
+; CHECK-NEXT: add.u64 %rd1, %SP, 0;
+; CHECK-NEXT: ld.global.b64 %rd2, [ptr];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
; CHECK-NEXT: .param .b64 param1;
; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: st.param.b64 [param1], %rd2;
-; CHECK-NEXT: ld.local.b64 %rd4, [%SPL+8];
-; CHECK-NEXT: st.param.b64 [param0], %rd4;
+; CHECK-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-NEXT: ld.local.b64 %rd3, [%SPL+8];
+; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
-; CHECK-NEXT: call (retval0), %rd3, (param0, param1), prototype_1;
+; CHECK-NEXT: call (retval0), %rd2, (param0, param1), prototype_1;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 35bfde0dc2202..591107404fe7a 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -10,6 +10,7 @@ define void @foo(i32 %a) {
; PTX32-LABEL: foo(
; PTX32: {
; PTX32-NEXT: .local .align 4 .b8 __local_depot0[4];
+; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
; PTX32-NEXT: .reg .b32 %r<2>;
; PTX32-EMPTY:
@@ -22,6 +23,7 @@ define void @foo(i32 %a) {
; PTX64-LABEL: foo(
; PTX64: {
; PTX64-NEXT: .local .align 4 .b8 __local_depot0[4];
+; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
; PTX64-EMPTY:
@@ -39,18 +41,19 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX32-LABEL: foo2(
; PTX32: {
; PTX32-NEXT: .local .align 4 .b8 __local_depot1[4];
+; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
-; PTX32-NEXT: .reg .b32 %r<4>;
+; PTX32-NEXT: .reg .b32 %r<3>;
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
; PTX32-NEXT: mov.b32 %SPL, __local_depot1;
+; PTX32-NEXT: cvta.local.u32 %SP, %SPL;
; PTX32-NEXT: ld.param.b32 %r1, [foo2_param_0];
-; PTX32-NEXT: add.u32 %r2, %SPL, 0;
-; PTX32-NEXT: cvta.local.u32 %r3, %r2;
+; PTX32-NEXT: add.u32 %r2, %SP, 0;
; PTX32-NEXT: st.local.b32 [%SPL], %r1;
; PTX32-NEXT: { // callseq 0, 0
; PTX32-NEXT: .param .b32 param0;
-; PTX32-NEXT: st.param.b32 [param0], %r3;
+; PTX32-NEXT: st.param.b32 [param0], %r2;
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 0
; PTX32-NEXT: ret;
@@ -58,19 +61,20 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX64-LABEL: foo2(
; PTX64: {
; PTX64-NEXT: .local .align 4 .b8 __local_depot1[4];
+; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
-; PTX64-NEXT: .reg .b64 %rd<3>;
+; PTX64-NEXT: .reg .b64 %rd<2>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
; PTX64-NEXT: mov.b64 %SPL, __local_depot1;
+; PTX64-NEXT: cvta.local.u64 %SP, %SPL;
; PTX64-NEXT: ld.param.b32 %r1, [foo2_param_0];
-; PTX64-NEXT: add.u64 %rd1, %SPL, 0;
-; PTX64-NEXT: cvta.local.u64 %rd2, %rd1;
+; PTX64-NEXT: add.u64 %rd1, %SP, 0;
; PTX64-NEXT: st.local.b32 [%SPL], %r1;
; PTX64-NEXT: { // callseq 0, 0
; PTX64-NEXT: .param .b64 param0;
-; PTX64-NEXT: st.param.b64 [param0], %rd2;
+; PTX64-NEXT: st.param.b64 [param0], %rd1;
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 0
; PTX64-NEXT: ret;
@@ -86,6 +90,7 @@ define void @foo3(i32 %a) {
; PTX32-LABEL: foo3(
; PTX32: {
; PTX32-NEXT: .local .align 4 .b8 __local_depot2[12];
+; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
; PTX32-NEXT: .reg .b32 %r<5>;
; PTX32-EMPTY:
@@ -101,6 +106,7 @@ define void @foo3(i32 %a) {
; PTX64-LABEL: foo3(
; PTX64: {
; PTX64-NEXT: .local .align 4 .b8 __local_depot2[12];
+; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
; PTX64-NEXT: .reg .b64 %rd<3>;
@@ -122,25 +128,25 @@ define void @foo4() {
; PTX32-LABEL: foo4(
; PTX32: {
; PTX32-NEXT: .local .align 4 .b8 __local_depot3[8];
+; PTX32-NEXT: .reg .b32 %SP;
; PTX32-NEXT: .reg .b32 %SPL;
-; PTX32-NEXT: .reg .b32 %r<5>;
+; PTX32-NEXT: .reg .b32 %r<3>;
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
; PTX32-NEXT: mov.b32 %SPL, __local_depot3;
-; PTX32-NEXT: add.u32 %r1, %SPL, 0;
-; PTX32-NEXT: cvta.local.u32 %r2, %r1;
-; PTX32-NEXT: add.u32 %r3, %SPL, 4;
-; PTX32-NEXT: cvta.local.u32 %r4, %r3;
+; PTX32-NEXT: cvta.local.u32 %SP, %SPL;
+; PTX32-NEXT: add.u32 %r1, %SP, 0;
+; PTX32-NEXT: add.u32 %r2, %SP, 4;
; PTX32-NEXT: st.local.b32 [%SPL], 0;
; PTX32-NEXT: st.local.b32 [%SPL+4], 0;
; PTX32-NEXT: { // callseq 1, 0
; PTX32-NEXT: .param .b32 param0;
-; PTX32-NEXT: st.param.b32 [param0], %r2;
+; PTX32-NEXT: st.param.b32 [param0], %r1;
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 1
; PTX32-NEXT: { // callseq 2, 0
; PTX32-NEXT: .param .b32 param0;
-; PTX32-NEXT: st.param.b32 [param0], %r4;
+; PTX32-NEXT: st.param.b32 [param0], %r2;
; PTX32-NEXT: call.uni bar, (param0);
; PTX32-NEXT: } // callseq 2
; PTX32-NEXT: ret;
@@ -148,25 +154,25 @@ define void @foo4() {
; PTX64-LABEL: foo4(
; PTX64: {
; PTX64-NEXT: .local .align 4 .b8 __local_depot3[8];
+; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
-; PTX64-NEXT: .reg .b64 %rd<5>;
+; PTX64-NEXT: .reg .b64 %rd<3>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
; PTX64-NEXT: mov.b64 %SPL, __local_depot3;
-; PTX64-NEXT: add.u64 %rd1, %SPL, 0;
-; PTX64-NEXT: cvta.local.u64 %rd2, %rd1;
-; PTX64-NEXT: add.u64 %rd3, %SPL, 4;
-; PTX64-NEXT: cvta.local.u64 %rd4, %rd3;
+; PTX64-NEXT: cvta.local.u64 %SP, %SPL;
+; PTX64-NEXT: add.u64 %rd1, %SP, 0;
+; PTX64-NEXT: add.u64 %rd2, %SP, 4;
; PTX64-NEXT: st.local.b32 [%SPL], 0;
; PTX64-NEXT: st.local.b32 [%SPL+4], 0;
; PTX64-NEXT: { // callseq 1, 0
; PTX64-NEXT: .param .b64 param0;
-; PTX64-NEXT: st.param.b64 [param0], %rd2;
+; PTX64-NEXT: st.param.b64 [param0], %rd1;
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 1
; PTX64-NEXT: { // callseq 2, 0
; PTX64-NEXT: .param .b64 param0;
-; PTX64-NEXT: st.param.b64 [param0], %rd4;
+; PTX64-NEXT: st.param.b64 [param0], %rd2;
; PTX64-NEXT: call.uni bar, (param0);
; PTX64-NEXT: } // callseq 2
; PTX64-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 2cfca3f6b3174..a8920086d138a 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -149,19 +149,20 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-LABEL: multiple_grid_const_escape(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b32 %r<2>;
-; PTX-NEXT: .reg .b64 %rd<8>;
+; PTX-NEXT: .reg .b64 %rd<7>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %SPL, __local_depot4;
+; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: mov.b64 %rd1, multiple_grid_const_escape_param_0;
; PTX-NEXT: ld.param.b32 %r1, [multiple_grid_const_escape_param_1];
; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_2;
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
; PTX-NEXT: cvta.param.u64 %rd4, %rd1;
-; PTX-NEXT: add.u64 %rd5, %SPL, 0;
-; PTX-NEXT: cvta.local.u64 %rd6, %rd5;
+; PTX-NEXT: add.u64 %rd5, %SP, 0;
; PTX-NEXT: st.local.b32 [%SPL], %r1;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
@@ -169,11 +170,11 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: .param .b64 param2;
; PTX-NEXT: .param .b32 retval0;
; PTX-NEXT: st.param.b64 [param2], %rd3;
-; PTX-NEXT: st.param.b64 [param1], %rd6;
+; PTX-NEXT: st.param.b64 [param1], %rd5;
; PTX-NEXT: st.param.b64 [param0], %rd4;
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
-; PTX-NEXT: mov.b64 %rd7, escape3;
-; PTX-NEXT: call (retval0), %rd7, (param0, param1, param2), prototype_1;
+; PTX-NEXT: mov.b64 %rd6, escape3;
+; PTX-NEXT: call (retval0), %rd6, (param0, param1, param2), prototype_1;
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 246eaf8a55538..ed6657ae90549 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -136,21 +136,22 @@ define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out
; PTX-LABEL: escape_ptr(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot2[8];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b64 %rd<7>;
+; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot2;
-; PTX-NEXT: add.u64 %rd1, %SPL, 0;
-; PTX-NEXT: cvta.local.u64 %rd2, %rd1;
-; PTX-NEXT: ld.param.b32 %rd3, [escape_ptr_param_1+4];
-; PTX-NEXT: shl.b64 %rd4, %rd3, 32;
-; PTX-NEXT: ld.param.b32 %rd5, [escape_ptr_param_1];
-; PTX-NEXT: or.b64 %rd6, %rd4, %rd5;
-; PTX-NEXT: st.local.b64 [%SPL], %rd6;
+; PTX-NEXT: cvta.local.u64 %SP, %SPL;
+; PTX-NEXT: add.u64 %rd1, %SP, 0;
+; PTX-NEXT: ld.param.b32 %rd2, [escape_ptr_param_1+4];
+; PTX-NEXT: shl.b64 %rd3, %rd2, 32;
+; PTX-NEXT: ld.param.b32 %rd4, [escape_ptr_param_1];
+; PTX-NEXT: or.b64 %rd5, %rd3, %rd4;
+; PTX-NEXT: st.local.b64 [%SPL], %rd5;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd2;
+; PTX-NEXT: st.param.b64 [param0], %rd1;
; PTX-NEXT: call.uni _Z6escapePv, (param0);
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
@@ -174,22 +175,23 @@ define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone
; PTX-LABEL: escape_ptr_gep(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot3[8];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b64 %rd<8>;
+; PTX-NEXT: .reg .b64 %rd<7>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot3;
-; PTX-NEXT: add.u64 %rd1, %SPL, 0;
-; PTX-NEXT: cvta.local.u64 %rd2, %rd1;
-; PTX-NEXT: ld.param.b32 %rd3, [escape_ptr_gep_param_1+4];
-; PTX-NEXT: shl.b64 %rd4, %rd3, 32;
-; PTX-NEXT: ld.param.b32 %rd5, [escape_ptr_gep_param_1];
-; PTX-NEXT: or.b64 %rd6, %rd4, %rd5;
-; PTX-NEXT: st.local.b64 [%SPL], %rd6;
-; PTX-NEXT: add.s64 %rd7, %rd2, 4;
+; PTX-NEXT: cvta.local.u64 %SP, %SPL;
+; PTX-NEXT: add.u64 %rd1, %SP, 0;
+; PTX-NEXT: ld.param.b32 %rd2, [escape_ptr_gep_param_1+4];
+; PTX-NEXT: shl.b64 %rd3, %rd2, 32;
+; PTX-NEXT: ld.param.b32 %rd4, [escape_ptr_gep_param_1];
+; PTX-NEXT: or.b64 %rd5, %rd3, %rd4;
+; PTX-NEXT: st.local.b64 [%SPL], %rd5;
+; PTX-NEXT: add.s64 %rd6, %rd1, 4;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd7;
+; PTX-NEXT: st.param.b64 [param0], %rd6;
; PTX-NEXT: call.uni _Z6escapePv, (param0);
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
@@ -213,21 +215,22 @@ define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeon
; PTX-LABEL: escape_ptr_store(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot4[8];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b64 %rd<9>;
+; PTX-NEXT: .reg .b64 %rd<8>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot4;
+; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b64 %rd1, [escape_ptr_store_param_0];
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
-; PTX-NEXT: add.u64 %rd3, %SPL, 0;
-; PTX-NEXT: cvta.local.u64 %rd4, %rd3;
-; PTX-NEXT: ld.param.b32 %rd5, [escape_ptr_store_param_1+4];
-; PTX-NEXT: shl.b64 %rd6, %rd5, 32;
-; PTX-NEXT: ld.param.b32 %rd7, [escape_ptr_store_param_1];
-; PTX-NEXT: or.b64 %rd8, %rd6, %rd7;
-; PTX-NEXT: st.local.b64 [%SPL], %rd8;
-; PTX-NEXT: st.global.b64 [%rd2], %rd4;
+; PTX-NEXT: add.u64 %rd3, %SP, 0;
+; PTX-NEXT: ld.param.b32 %rd4, [escape_ptr_store_param_1+4];
+; PTX-NEXT: shl.b64 %rd5, %rd4, 32;
+; PTX-NEXT: ld.param.b32 %rd6, [escape_ptr_store_param_1];
+; PTX-NEXT: or.b64 %rd7, %rd5, %rd6;
+; PTX-NEXT: st.local.b64 [%SPL], %rd7;
+; PTX-NEXT: st.global.b64 [%rd2], %rd3;
; PTX-NEXT: ret;
entry:
store ptr %s, ptr %out, align 8
@@ -249,22 +252,23 @@ define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef wri
; PTX-LABEL: escape_ptr_gep_store(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot5[8];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b64 %rd<10>;
+; PTX-NEXT: .reg .b64 %rd<9>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot5;
+; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b64 %rd1, [escape_ptr_gep_store_param_0];
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
-; PTX-NEXT: add.u64 %rd3, %SPL, 0;
-; PTX-NEXT: cvta.local.u64 %rd4, %rd3;
-; PTX-NEXT: ld.param.b32 %rd5, [escape_ptr_gep_store_param_1+4];
-; PTX-NEXT: shl.b64 %rd6, %rd5, 32;
-; PTX-NEXT: ld.param.b32 %rd7, [escape_ptr_gep_store_param_1];
-; PTX-NEXT: or.b64 %rd8, %rd6, %rd7;
-; PTX-NEXT: st.local.b64 [%SPL], %rd8;
-; PTX-NEXT: add.s64 %rd9, %rd4, 4;
-; PTX-NEXT: st.global.b64 [%rd2], %rd9;
+; PTX-NEXT: add.u64 %rd3, %SP, 0;
+; PTX-NEXT: ld.param.b32 %rd4, [escape_ptr_gep_store_param_1+4];
+; PTX-NEXT: shl.b64 %rd5, %rd4, 32;
+; PTX-NEXT: ld.param.b32 %rd6, [escape_ptr_gep_store_param_1];
+; PTX-NEXT: or.b64 %rd7, %rd5, %rd6;
+; PTX-NEXT: st.local.b64 [%SPL], %rd7;
+; PTX-NEXT: add.s64 %rd8, %rd3, 4;
+; PTX-NEXT: st.global.b64 [%rd2], %rd8;
; PTX-NEXT: ret;
entry:
%b = getelementptr inbounds nuw i8, ptr %s, i64 4
@@ -287,21 +291,22 @@ define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonl
; PTX-LABEL: escape_ptrtoint(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot6[8];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b64 %rd<9>;
+; PTX-NEXT: .reg .b64 %rd<8>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot6;
+; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b64 %rd1, [escape_ptrtoint_param_0];
; PTX-NEXT: cvta.to.global.u64 %rd2, %rd1;
-; PTX-NEXT: add.u64 %rd3, %SPL, 0;
-; PTX-NEXT: cvta.local.u64 %rd4, %rd3;
-; PTX-NEXT: ld.param.b32 %rd5, [escape_ptrtoint_param_1+4];
-; PTX-NEXT: shl.b64 %rd6, %rd5, 32;
-; PTX-NEXT: ld.param.b32 %rd7, [escape_ptrtoint_param_1];
-; PTX-NEXT: or.b64 %rd8, %rd6, %rd7;
-; PTX-NEXT: st.local.b64 [%SPL], %rd8;
-; PTX-NEXT: st.global.b64 [%rd2], %rd4;
+; PTX-NEXT: add.u64 %rd3, %SP, 0;
+; PTX-NEXT: ld.param.b32 %rd4, [escape_ptrtoint_param_1+4];
+; PTX-NEXT: shl.b64 %rd5, %rd4, 32;
+; PTX-NEXT: ld.param.b32 %rd6, [escape_ptrtoint_param_1];
+; PTX-NEXT: or.b64 %rd7, %rd5, %rd6;
+; PTX-NEXT: st.local.b64 [%SPL], %rd7;
+; PTX-NEXT: st.global.b64 [%rd2], %rd3;
; PTX-NEXT: ret;
entry:
%i = ptrtoint ptr %s to i64
@@ -443,52 +448,53 @@ define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly
; PTX-LABEL: memcpy_to_param(
; PTX: {
; PTX-NEXT: .local .align 8 .b8 __local_depot9[8];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b16 %rs<17>;
-; PTX-NEXT: .reg .b64 %rd<8>;
+; PTX-NEXT: .reg .b64 %rd<7>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %SPL, __local_depot9;
+; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.b64 %rd1, [memcpy_to_param_param_0];
-; PTX-NEXT: add.u64 %rd2, %SPL, 0;
-; PTX-NEXT: cvta.local.u64 %rd3, %rd2;
-; PTX-NEXT: ld.param.b32 %rd4, [memcpy_to_param_param_1+4];
-; PTX-NEXT: shl.b64 %rd5, %rd4, 32;
-; PTX-NEXT: ld.param.b32 %rd6, [memcpy_to_param_param_1];
-; PTX-NEXT: or.b64 %rd7, %rd5, %rd6;
-; PTX-NEXT: st.local.b64 [%SPL], %rd7;
+; PTX-NEXT: add.u64 %rd2, %SP, 0;
+; PTX-NEXT: ld.param.b32 %rd3, [memcpy_to_param_param_1+4];
+; PTX-NEXT: shl.b64 %rd4, %rd3, 32;
+; PTX-NEXT: ld.param.b32 %rd5, [memcpy_to_param_param_1];
+; PTX-NEXT: or.b64 %rd6, %rd4, %rd5;
+; PTX-NEXT: st.local.b64 [%SPL], %rd6;
; PTX-NEXT: ld.volatile.b8 %rs1, [%rd1];
-; PTX-NEXT: st.volatile.b8 [%rd3], %rs1;
+; PTX-NEXT: st.volatile.b8 [%rd2], %rs1;
; PTX-NEXT: ld.volatile.b8 %rs2, [%rd1+1];
-; PTX-NEXT: st.volatile.b8 [%rd3+1], %rs2;
+; PTX-NEXT: st.volatile.b8 [%rd2+1], %rs2;
; PTX-NEXT: ld.volatile.b8 %rs3, [%rd1+2];
-; PTX-NEXT: st.volatile.b8 [%rd3+2], %rs3;
+; PTX-NEXT: st.volatile.b8 [%rd2+2], %rs3;
; PTX-NEXT: ld.volatile.b8 %rs4, [%rd1+3];
-; PTX-NEXT: st.volatile.b8 [%rd3+3], %rs4;
+; PTX-NEXT: st.volatile.b8 [%rd2+3], %rs4;
; PTX-NEXT: ld.volatile.b8 %rs5, [%rd1+4];
-; PTX-NEXT: st.volatile.b8 [%rd3+4], %rs5;
+; PTX-NEXT: st.volatile.b8 [%rd2+4], %rs5;
; PTX-NEXT: ld.volatile.b8 %rs6, [%rd1+5];
-; PTX-NEXT: st.volatile.b8 [%rd3+5], %rs6;
+; PTX-NEXT: st.volatile.b8 [%rd2+5], %rs6;
; PTX-NEXT: ld.volatile.b8 %rs7, [%rd1+6];
-; PTX-NEXT: st.volatile.b8 [%rd3+6], %rs7;
+; PTX-NEXT: st.volatile.b8 [%rd2+6], %rs7;
; PTX-NEXT: ld.volatile.b8 %rs8, [%rd1+7];
-; PTX-NEXT: st.volatile.b8 [%rd3+7], %rs8;
+; PTX-NEXT: st.volatile.b8 [%rd2+7], %rs8;
; PTX-NEXT: ld.volatile.b8 %rs9, [%rd1+8];
-; PTX-NEXT: st.volatile.b8 [%rd3+8], %rs9;
+; PTX-NEXT: st.volatile.b8 [%rd2+8], %rs9;
; PTX-NEXT: ld.volatile.b8 %rs10, [%rd1+9];
-; PTX-NEXT: st.volatile.b8 [%rd3+9], %rs10;
+; PTX-NEXT: st.volatile.b8 [%rd2+9], %rs10;
; PTX-NEXT: ld.volatile.b8 %rs11, [%rd1+10];
-; PTX-NEXT: st.volatile.b8 [%rd3+10], %rs11;
+; PTX-NEXT: st.volatile.b8 [%rd2+10], %rs11;
; PTX-NEXT: ld.volatile.b8 %rs12, [%rd1+11];
-; PTX-NEXT: st.volatile.b8 [%rd3+11], %rs12;
+; PTX-NEXT: st.volatile.b8 [%rd2+11], %rs12;
; PTX-NEXT: ld.volatile.b8 %rs13, [%rd1+12];
-; PTX-NEXT: st.volatile.b8 [%rd3+12], %rs13;
+; PTX-NEXT: st.volatile.b8 [%rd2+12], %rs13;
; PTX-NEXT: ld.volatile.b8 %rs14, [%rd1+13];
-; PTX-NEXT: st.volatile.b8 [%rd3+13], %rs14;
+; PTX-NEXT: st.volatile.b8 [%rd2+13], %rs14;
; PTX-NEXT: ld.volatile.b8 %rs15, [%rd1+14];
-; PTX-NEXT: st.volatile.b8 [%rd3+14], %rs15;
+; PTX-NEXT: st.volatile.b8 [%rd2+14], %rs15;
; PTX-NEXT: ld.volatile.b8 %rs16, [%rd1+15];
-; PTX-NEXT: st.volatile.b8 [%rd3+15], %rs16;
+; PTX-NEXT: st.volatile.b8 [%rd2+15], %rs16;
; PTX-NEXT: ret;
entry:
tail call void @llvm.memcpy.p0.p0.i64(ptr %s, ptr %in, i64 16, i1 true)
@@ -622,6 +628,7 @@ define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr by
; PTX-LABEL: test_select_write(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot12[8];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .pred %p<2>;
; PTX-NEXT: .reg .b16 %rs<3>;
@@ -799,6 +806,7 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr
; PTX-LABEL: test_phi_write(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot14[8];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .pred %p<2>;
; PTX-NEXT: .reg .b16 %rs<3>;
@@ -850,6 +858,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-LABEL: test_forward_byval_arg(
; PTX: {
; PTX-NEXT: .local .align 4 .b8 __local_depot15[4];
+; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b32 %r<2>;
; PTX-EMPTY:
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index e17d7d0b28269..a42d938a9c4ce 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -102,24 +102,25 @@ define dso_local i32 @foo() {
; CHECK-PTX-LABEL: foo(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot1[40];
+; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot1;
+; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: st.local.b64 [%SPL], 4294967297;
; CHECK-PTX-NEXT: st.local.b32 [%SPL+8], 1;
; CHECK-PTX-NEXT: st.local.b64 [%SPL+16], 1;
; CHECK-PTX-NEXT: st.local.b64 [%SPL+24], 4607182418800017408;
; CHECK-PTX-NEXT: st.local.b64 [%SPL+32], 4607182418800017408;
-; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 0;
-; CHECK-PTX-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
; CHECK-PTX-NEXT: { // callseq 0, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
@@ -138,6 +139,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-LABEL: variadics2(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4];
+; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<6>;
@@ -197,13 +199,15 @@ define dso_local i32 @bar() {
; CHECK-PTX-LABEL: bar(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
+; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot3;
+; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: ld.global.nc.b8 %rs1, [__const_$_bar_$_s1+7];
; CHECK-PTX-NEXT: st.local.b8 [%SPL+2], %rs1;
; CHECK-PTX-NEXT: ld.global.nc.b8 %rs2, [__const_$_bar_$_s1+6];
@@ -214,13 +218,12 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: st.local.b32 [%SPL+8], 1;
; CHECK-PTX-NEXT: st.local.b8 [%SPL+12], 1;
; CHECK-PTX-NEXT: st.local.b64 [%SPL+16], 1;
-; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 8;
-; CHECK-PTX-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 8;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
@@ -277,20 +280,21 @@ define dso_local i32 @baz() {
; CHECK-PTX-LABEL: baz(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 16 .b8 __local_depot5[16];
+; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<2>;
-; CHECK-PTX-NEXT: .reg .b64 %rd<3>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5;
+; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: st.local.v4.b32 [%SPL], {1, 1, 1, 1};
-; CHECK-PTX-NEXT: add.u64 %rd1, %SPL, 0;
-; CHECK-PTX-NEXT: cvta.local.u64 %rd2, %rd1;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
; CHECK-PTX-NEXT: { // callseq 2, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
@@ -342,23 +346,24 @@ define dso_local void @qux() {
; CHECK-PTX-LABEL: qux(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
+; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
-; CHECK-PTX-NEXT: .reg .b64 %rd<5>;
+; CHECK-PTX-NEXT: .reg .b64 %rd<4>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot7;
+; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: ld.global.nc.b64 %rd1, [__const_$_qux_$_s+8];
; CHECK-PTX-NEXT: st.local.b64 [%SPL+8], %rd1;
; CHECK-PTX-NEXT: ld.global.nc.b64 %rd2, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.local.b64 [%SPL], %rd2;
; CHECK-PTX-NEXT: st.local.b64 [%SPL+16], 1;
-; CHECK-PTX-NEXT: add.u64 %rd3, %SPL, 16;
-; CHECK-PTX-NEXT: cvta.local.u64 %rd4, %rd3;
+; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 16;
; CHECK-PTX-NEXT: { // callseq 3, 0
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: .param .b32 retval0;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd1;
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd2;
; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1);
diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
index 5d9b4ee6f5297..d41b5aefec58a 100644
--- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
+++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
@@ -9,7 +9,7 @@
; CHECK: .loc 1 5 3 // t.c:5:3
; CHECK: { // callseq 0, 0
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], %rd2;
+; CHECK: st.param.b64 [param0], %rd1;
; CHECK: call.uni escape_foo, (param0);
; CHECK: } // callseq 0
; CHECK: .loc 1 6 1 // t.c:6:1
>From 0a5b4d2f0c7d33576d968c4a2d51d275b8d69e08 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Mon, 1 Sep 2025 15:30:57 +0000
Subject: [PATCH 12/14] Address comments
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 4 ++--
llvm/lib/Target/NVPTX/NVPTXPeephole.cpp | 9 ++++++---
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 2 +-
3 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 43f52e72f8d5b..7a8d9cb34815b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -2257,8 +2257,8 @@ def SDTDynAllocaOp
: SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<1>, SDTCisVT<2, i32>]>;
def getAllocaAlign : SDNodeXForm<imm, [{
- if (N->getZExtValue() != 0)
- return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), N->getValueType(0));
+ if (auto NV = N->getZExtValue())
+ return CurDAG->getTargetConstant(NV, SDLoc(N), N->getValueType(0));
return CurDAG->getTargetConstant(CurDAG->getSubtarget().getFrameLowering()->getStackAlign().value(), SDLoc(N), N->getValueType(0));
}]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
index 9223712537fd2..9c0ca5d9c5e62 100644
--- a/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -125,9 +125,12 @@ static void CombineCVTALocal(MachineInstr &CVTALocalInstr) {
if (LeaInstr->getOpcode() == NVPTX::CVT_u64_u32) {
CVTInstr = LeaInstr;
LeaInstr = MRI.getUniqueVRegDef(LeaInstr->getOperand(1).getReg());
- assert((LeaInstr->getOpcode() == NVPTX::LEA_ADDRi64 ||
- LeaInstr->getOpcode() == NVPTX::LEA_ADDRi) &&
- "Expected LEA_ADDRi64 or LEA_ADDRi");
+ if ((LeaInstr->getOpcode() != NVPTX::LEA_ADDRi64 &&
+ LeaInstr->getOpcode() != NVPTX::LEA_ADDRi)) {
+ LLVM_DEBUG(dbgs() << "NVPTXPeephole: Expected LEA_ADDRi64 or LEA_ADDRi");
+
+ return;
+ }
}
const NVPTXRegisterInfo *NRI =
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 01b2f874da1d4..a2cf99179b403 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -356,7 +356,7 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXLowerArgsPass());
if (getOptLevel() != CodeGenOptLevel::None)
- // NVPTXLowerArgs emits alloca for byval parameters which can often
+ // NVPTXLowerArgs may emit alloca for byval parameters which can often
// be eliminated by SROA.
addPass(createSROAPass());
addPass(createNVPTXLowerAllocaPass());
>From 24997bea47ddc576f2ad8ff1fe443913c16e9c96 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Tue, 2 Sep 2025 13:56:10 +0000
Subject: [PATCH 13/14] Handle Data Layout better
---
clang/lib/Basic/Targets/NVPTX.cpp | 8 +--
clang/lib/CodeGen/BackendUtil.cpp | 13 ++--
llvm/include/llvm/Target/TargetMachine.h | 2 +-
llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 18 +----
llvm/lib/Target/NVPTX/NVPTXTargetMachine.h | 1 -
.../NVPTX/no-stack-protector-libcall-error.ll | 2 +
llvm/test/CodeGen/NVPTX/variadics-lowering.ll | 14 ++--
.../OpenMP/custom_state_machines.ll | 42 ++++++-----
.../Transforms/OpenMP/remove_globalization.ll | 11 +--
llvm/test/Transforms/OpenMP/spmdization.ll | 71 +++++++++++--------
.../Transforms/OpenMP/spmdization_guarding.ll | 17 +++--
.../Transforms/OpenMP/spmdization_indirect.ll | 19 ++---
...zation_no_guarding_two_reaching_kernels.ll | 21 +++---
.../NVPTX/safestack-libcall-error.ll | 2 +
14 files changed, 129 insertions(+), 112 deletions(-)
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 5cf2dc187b836..2a92cba5850c1 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -69,14 +69,14 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
HasFloat16 = true;
if (TargetPointerWidth == 32)
- resetDataLayout(
- "e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
+ resetDataLayout("e-p:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:16-v32:32-"
+ "n16:32:64-A5");
else if (Opts.NVPTXUseShortPointers)
resetDataLayout(
"e-p3:32:32-p4:32:32-p5:32:32-p6:32:32-p7:32:32-i64:64-i128:128-v16:"
- "16-v32:32-n16:32:64");
+ "16-v32:32-n16:32:64-A5");
else
- resetDataLayout("e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64");
+ resetDataLayout("e-p6:32:32-i64:64-i128:128-v16:16-v32:32-n16:32:64-A5");
// If possible, get a TargetInfo for our host triple, so we can match its
// types.
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 8198f9c58340c..0b8b824fbcd5a 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1444,16 +1444,15 @@ void clang::emitBackendOutput(CompilerInstance &CI, CodeGenOptions &CGOpts,
// Verify clang's TargetInfo DataLayout against the LLVM TargetMachine's
// DataLayout.
- if (AsmHelper.TM)
- if (!AsmHelper.TM->isCompatibleDataLayout(M->getDataLayout()) ||
- !AsmHelper.TM->isCompatibleDataLayout(DataLayout(TDesc))) {
- std::string DLDesc = M->getDataLayout().getStringRepresentation();
+ if (AsmHelper.TM) {
+ std::string DLDesc = M->getDataLayout().getStringRepresentation();
+ if (DLDesc != TDesc) {
unsigned DiagID = Diags.getCustomDiagID(
- DiagnosticsEngine::Error,
- "backend data layout '%0' is not compatible with "
- "expected target description '%1'");
+ DiagnosticsEngine::Error, "backend data layout '%0' does not match "
+ "expected target description '%1'");
Diags.Report(DiagID) << DLDesc << TDesc;
}
+ }
}
// With -fembed-bitcode, save a copy of the llvm IR as data in the
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index f4cd06bf93d40..bf4e490554723 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -208,7 +208,7 @@ class LLVM_ABI TargetMachine {
/// The LLVM Module owns a DataLayout that is used for the target independent
/// optimizations and code generation. This hook provides a target specific
/// check on the validity of this DataLayout.
- virtual bool isCompatibleDataLayout(const DataLayout &Candidate) const {
+ bool isCompatibleDataLayout(const DataLayout &Candidate) const {
return DL == Candidate;
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index a2cf99179b403..1f05355604454 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -133,7 +133,7 @@ static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
- return Ret;
+ return Ret + "-A" + std::to_string(ADDRESS_SPACE_LOCAL);
}
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
@@ -498,22 +498,6 @@ void NVPTXPassConfig::addMachineSSAOptimization() {
printAndVerify("After codegen peephole optimization pass");
}
-bool NVPTXTargetMachine::isCompatibleDataLayout(
- const DataLayout &Candidate) const {
- // XXX: Should we enforce that the Candidate DataLayout has the same address
- // space for allocas?
- if (DL == Candidate)
- return true;
-
- auto DLStr = DL.getStringRepresentation();
- if (!StringRef(DLStr).contains("A"))
- DLStr = DLStr.empty() ? "A" + std::to_string(ADDRESS_SPACE_LOCAL)
- : DLStr + "-A" + std::to_string(ADDRESS_SPACE_LOCAL);
- auto NewDL = DataLayout(DLStr);
-
- return NewDL == Candidate;
-}
-
unsigned
NVPTXTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
if (Kind == PseudoSourceValue::FixedStack) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index c2f09a89865c4..802c63ffca7f6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -77,7 +77,6 @@ class NVPTXTargetMachine : public CodeGenTargetMachineImpl {
std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override;
- bool isCompatibleDataLayout(const DataLayout &Candidate) const override;
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override;
}; // NVPTXTargetMachine.
diff --git a/llvm/test/CodeGen/NVPTX/no-stack-protector-libcall-error.ll b/llvm/test/CodeGen/NVPTX/no-stack-protector-libcall-error.ll
index f877d95dd3769..177562db50789 100644
--- a/llvm/test/CodeGen/NVPTX/no-stack-protector-libcall-error.ll
+++ b/llvm/test/CodeGen/NVPTX/no-stack-protector-libcall-error.ll
@@ -1,5 +1,7 @@
; RUN: not opt -disable-output -mtriple=nvptx64-- -enable-selectiondag-sp=0 -passes=stack-protector %s 2>&1 | FileCheck %s
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+
; CHECK: error: no libcall available for stack protector
define void @func() sspreq nounwind {
%alloca = alloca i32, align 4
diff --git a/llvm/test/CodeGen/NVPTX/variadics-lowering.ll b/llvm/test/CodeGen/NVPTX/variadics-lowering.ll
index 1d69f8de2ca8c..dcabea29051b4 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-lowering.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-lowering.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=nvptx64-- --passes=expand-variadics --expand-variadics-override=lowering < %s | FileCheck %s
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+
%struct.S1 = type { i32, i8, i64 }
%struct.S2 = type { i64, i64 }
@@ -330,14 +332,14 @@ entry:
define dso_local void @qux() {
; CHECK-LABEL: define dso_local void @qux() {
; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S2:%.*]], align 8
; CHECK-NEXT: [[VARARG_BUFFER:%.*]] = alloca [[QUX_VARARG:%.*]], align 8
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[S]], ptr align 8 @__const.qux.s, i64 16, i1 false)
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[VARARG_BUFFER]])
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[QUX_VARARG]], ptr [[VARARG_BUFFER]], i32 0, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = alloca [[QUX_VARARG1:%.*]], align 8
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VARARG_BUFFER]], ptr align 8 @__const.qux.s, i64 16, i1 false)
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP1]])
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[QUX_VARARG1]], ptr [[TMP1]], i32 0, i32 0
; CHECK-NEXT: store i64 1, ptr [[TMP0]], align 8
-; CHECK-NEXT: [[CALL:%.*]] = call i32 @variadics4(ptr noundef byval([[STRUCT_S2]]) align 8 [[S]], ptr [[VARARG_BUFFER]])
-; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[VARARG_BUFFER]])
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @variadics4(ptr noundef byval([[QUX_VARARG]]) align 8 [[VARARG_BUFFER]], ptr [[TMP1]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP1]])
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index 2fe28daf304a6..1a012b231bd06 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -2069,7 +2069,7 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_l22
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2085,8 +2085,9 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
@@ -2189,7 +2190,7 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_interprocedural_l39
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2205,8 +2206,9 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
@@ -2335,7 +2337,7 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_with_fallback_l55
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2351,8 +2353,9 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
@@ -2456,7 +2459,7 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_no_openmp_attr_l66
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2472,8 +2475,9 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
@@ -2575,7 +2579,7 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_simple_state_machine_pure_l77
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2591,8 +2595,9 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
@@ -2766,7 +2771,7 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112
; NVPTX-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2782,8 +2787,9 @@ attributes #9 = { convergent nounwind willreturn memory(read) }
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 419d3d07c2bea..eb3eab3162f95 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -82,7 +82,8 @@ define internal void @foo() {
; CHECK-DISABLED-LABEL: define {{[^@]+}}@foo
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
+; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
+; CHECK-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTH2S]] to ptr
; CHECK-DISABLED-NEXT: ret void
;
entry:
@@ -103,8 +104,9 @@ define internal void @bar() {
; CHECK-DISABLED-LABEL: define {{[^@]+}}@bar
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
-; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[DOTH2S]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]]
+; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
+; CHECK-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTH2S]] to ptr
+; CHECK-DISABLED-NEXT: call void @share(ptr nofree [[MALLOC_CAST]]) #[[ATTR5:[0-9]+]], !dbg [[DBG7:![0-9]+]]
; CHECK-DISABLED-NEXT: ret void
;
entry:
@@ -180,7 +182,7 @@ define internal void @convert_and_move_alloca() {
; CHECK-DISABLED-LABEL: define {{[^@]+}}@convert_and_move_alloca
; CHECK-DISABLED-SAME: () #[[ATTR1]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
+; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
; CHECK-DISABLED-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
@@ -190,6 +192,7 @@ define internal void @convert_and_move_alloca() {
; CHECK-DISABLED-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY:%.*]]
; CHECK-DISABLED: loopbody:
+; CHECK-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTH2S]] to ptr
; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 0272c41d9d1fc..fc239d722506c 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -334,7 +334,7 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_l5__debug
; NVPTX-DISABLED1-SAME: () #[[ATTR1:[0-9]+]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -350,8 +350,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug()
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
@@ -816,7 +817,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -832,8 +833,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
@@ -932,10 +934,11 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
+; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
+; NVPTX-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
+; NVPTX-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1000,10 +1003,11 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
+; NVPTX-DISABLED1-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-DISABLED1-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
+; NVPTX-DISABLED1-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
+; NVPTX-DISABLED1-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; NVPTX-DISABLED1-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED1: for.cond:
; NVPTX-DISABLED1-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1022,10 +1026,11 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias %
; NVPTX-DISABLED2-LABEL: define {{[^@]+}}@__omp_outlined__2
; NVPTX-DISABLED2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
; NVPTX-DISABLED2-NEXT: entry:
-; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
+; NVPTX-DISABLED2-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED2-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-DISABLED2-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR7]]
+; NVPTX-DISABLED2-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
+; NVPTX-DISABLED2-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR7]]
; NVPTX-DISABLED2-NEXT: br label [[FOR_COND:%.*]]
; NVPTX-DISABLED2: for.cond:
; NVPTX-DISABLED2-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -1316,7 +1321,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1332,8 +1337,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
@@ -1842,7 +1848,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -1858,8 +1864,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_sequential_loop_to_s
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
@@ -2339,7 +2346,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2355,8 +2362,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
@@ -2452,7 +2460,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-DISABLED1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -2468,8 +2476,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_targe
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
@@ -2636,7 +2645,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-SAME: () #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
@@ -2650,8 +2659,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
@@ -2761,7 +2771,7 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-DISABLED1-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74
; NVPTX-DISABLED1-SAME: () #[[ATTR0]] {
; NVPTX-DISABLED1-NEXT: entry:
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-DISABLED1-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
; NVPTX-DISABLED1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_kernel_environment, ptr null)
@@ -2775,8 +2785,9 @@ define weak ptx_kernel void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_
; NVPTX-DISABLED1-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX-DISABLED1: worker_state_machine.begin:
; NVPTX-DISABLED1-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-DISABLED1-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-DISABLED1-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-DISABLED1-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-DISABLED1-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX-DISABLED1: worker_state_machine.finished:
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index d057e5b233e87..f2a69bbfb722e 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -61,7 +61,7 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2a_fbfa7a_sequential_loop_l6
; CHECK-SAME: (ptr [[DYN:%.*]], ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8
+; CHECK-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8, addrspace(5)
; CHECK-NEXT: [[LOC:%.*]] = alloca ptr, align 8
; CHECK-NEXT: [[AL32:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
@@ -96,7 +96,8 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP3]])
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
-; CHECK-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
+; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[HEAP2STACK_H2S]] to ptr
+; CHECK-NEXT: call void @usei8ptr(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR9:[0-9]+]]
; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
; CHECK: for.cond.i:
; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 2, [[REGION_EXIT]] ], [ [[INC_I:%.*]], [[REGION_EXIT3:%.*]] ]
@@ -192,8 +193,8 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_2a_fbfa7a_sequential_loop_l6
; CHECK-DISABLED-SAME: (ptr [[DYN:%.*]], ptr [[X:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-DISABLED-NEXT: entry:
-; CHECK-DISABLED-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8
-; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLED-NEXT: [[HEAP2STACK_H2S:%.*]] = alloca i8, i64 8, align 8, addrspace(5)
+; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-DISABLED-NEXT: [[LOC:%.*]] = alloca ptr, align 8
; CHECK-DISABLED-NEXT: [[AL32:%.*]] = alloca i32, align 4
; CHECK-DISABLED-NEXT: [[N_ADDR_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[N]] to i32
@@ -208,8 +209,9 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-DISABLED-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK-DISABLED: worker_state_machine.begin:
; CHECK-DISABLED-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; CHECK-DISABLED-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; CHECK-DISABLED-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; CHECK-DISABLED-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; CHECK-DISABLED-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; CHECK-DISABLED: worker_state_machine.finished:
@@ -247,7 +249,8 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK-DISABLED-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
; CHECK-DISABLED-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
-; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
+; CHECK-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[HEAP2STACK_H2S]] to ptr
+; CHECK-DISABLED-NEXT: call void @usei8ptr(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR9:[0-9]+]]
; CHECK-DISABLED-NEXT: br label [[FOR_COND_I:%.*]]
; CHECK-DISABLED: for.cond.i:
; CHECK-DISABLED-NEXT: [[I_0_I:%.*]] = phi i32 [ 2, [[USER_CODE_ENTRY]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
diff --git a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
index d1e006a704441..845893ea795bd 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_indirect.ll
@@ -279,10 +279,11 @@ define internal void @__omp_outlined_spmd_amenable2(ptr noalias %.global_tid., p
; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined_spmd_amenable2
; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4
+; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS_CAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
-; NVPTX-NEXT: call void @use(ptr captures(none) [[X_H2S]]) #[[ATTR6]]
+; NVPTX-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr
+; NVPTX-NEXT: call void @use(ptr captures(none) [[MALLOC_CAST]]) #[[ATTR6]]
; NVPTX-NEXT: br label [[FOR_COND:%.*]]
; NVPTX: for.cond:
; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
@@ -449,7 +450,7 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
; NVPTX-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callee
; NVPTX-SAME: (i1 [[C:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -465,8 +466,9 @@ define weak ptx_kernel void @spmd_and_non_spmd_callee(i1 %c) #0 {
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
@@ -804,7 +806,7 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; NVPTX-LABEL: define {{[^@]+}}@spmd_and_non_spmd_callees_metadata
; NVPTX-SAME: (ptr [[FP:%.*]]) #[[ATTR0]] {
; NVPTX-NEXT: entry:
-; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NVPTX-NEXT: [[DOTZERO_ADDR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca ptr, align 8, addrspace(5)
@@ -820,8 +822,9 @@ define weak ptx_kernel void @spmd_and_non_spmd_callees_metadata(ptr %fp) #0 {
; NVPTX-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; NVPTX: worker_state_machine.begin:
; NVPTX-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; NVPTX-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; NVPTX-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; NVPTX-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; NVPTX-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; NVPTX: worker_state_machine.finished:
diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
index 17e68030d1813..5a753c3106106 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll
@@ -73,7 +73,7 @@ define weak ptx_kernel void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 {
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_spmd_l12
; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[DYN:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_spmd_l12_kernel_environment, ptr [[DYN]])
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -85,8 +85,9 @@ define weak ptx_kernel void @__omp_offloading_2b_10393b5_spmd_l12(ptr %dyn) #0 {
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.begin:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished:
@@ -150,7 +151,7 @@ define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #
; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment, ptr [[DYN]])
; CHECK-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -162,8 +163,9 @@ define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #
; CHECK-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK: worker_state_machine.begin:
; CHECK-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; CHECK-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; CHECK-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; CHECK-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; CHECK-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; CHECK-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; CHECK: worker_state_machine.finished:
@@ -192,7 +194,7 @@ define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #
; CHECK-DISABLE-SPMDIZATION-LABEL: define {{[^@]+}}@__omp_offloading_2b_10393b5_generic_l20
; CHECK-DISABLE-SPMDIZATION-SAME: (ptr [[DYN:%.*]]) #[[ATTR0]] {
; CHECK-DISABLE-SPMDIZATION-NEXT: entry:
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-DISABLE-SPMDIZATION-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @__omp_offloading_2b_10393b5_generic_l20_kernel_environment, ptr [[DYN]])
; CHECK-DISABLE-SPMDIZATION-NEXT: [[THREAD_IS_WORKER:%.*]] = icmp ne i32 [[TMP0]], -1
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_WORKER]], label [[IS_WORKER_CHECK:%.*]], label [[THREAD_USER_CODE_CHECK:%.*]]
@@ -204,8 +206,9 @@ define weak ptx_kernel void @__omp_offloading_2b_10393b5_generic_l20(ptr %dyn) #
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[THREAD_IS_MAIN_OR_WORKER]], label [[WORKER_STATE_MACHINE_BEGIN:%.*]], label [[WORKER_STATE_MACHINE_FINISHED:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.begin:
; CHECK-DISABLE-SPMDIZATION-NEXT: call void @__kmpc_barrier_simple_generic(ptr @[[GLOB1]], i32 [[TMP0]])
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR]])
-; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR]], align 8
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN_ADDR_GENERIC:%.*]] = addrspacecast ptr addrspace(5) [[WORKER_WORK_FN_ADDR]] to ptr
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_ACTIVE:%.*]] = call i1 @__kmpc_kernel_parallel(ptr [[WORKER_WORK_FN_ADDR_GENERIC]])
+; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_WORK_FN:%.*]] = load ptr, ptr [[WORKER_WORK_FN_ADDR_GENERIC]], align 8
; CHECK-DISABLE-SPMDIZATION-NEXT: [[WORKER_IS_DONE:%.*]] = icmp eq ptr [[WORKER_WORK_FN]], null
; CHECK-DISABLE-SPMDIZATION-NEXT: br i1 [[WORKER_IS_DONE]], label [[WORKER_STATE_MACHINE_FINISHED]], label [[WORKER_STATE_MACHINE_IS_ACTIVE_CHECK:%.*]]
; CHECK-DISABLE-SPMDIZATION: worker_state_machine.finished:
diff --git a/llvm/test/Transforms/SafeStack/NVPTX/safestack-libcall-error.ll b/llvm/test/Transforms/SafeStack/NVPTX/safestack-libcall-error.ll
index a17a9dd867e3a..26d98cd421ff1 100644
--- a/llvm/test/Transforms/SafeStack/NVPTX/safestack-libcall-error.ll
+++ b/llvm/test/Transforms/SafeStack/NVPTX/safestack-libcall-error.ll
@@ -1,5 +1,7 @@
; RUN: not opt -disable-output -mtriple=nvptx64-- -mcpu=sm_90 -passes=safe-stack %s 2>&1 | FileCheck %s
+target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"
+
; CHECK: error: no libcall available for stackprotector check fail
define void @foo(i32 %t) #0 {
%vla = alloca i32, i32 %t, align 4
>From 2072fa9376570100dac522d8f572061cd1415ed4 Mon Sep 17 00:00:00 2001
From: Theodoros Theodoridis <ttheodoridis at nvidia.com>
Date: Tue, 2 Sep 2025 17:10:05 +0000
Subject: [PATCH 14/14] Update OpenMP tests
---
.../declare_target_codegen_globalization.cpp | 53 +-
clang/test/OpenMP/nvptx_NRVO_variable.cpp | 9 +-
clang/test/OpenMP/nvptx_SPMD_codegen.cpp | 28803 +++++++++-------
clang/test/OpenMP/nvptx_allocate_codegen.cpp | 57 +-
clang/test/OpenMP/nvptx_data_sharing.cpp | 123 +-
...stribute_parallel_generic_mode_codegen.cpp | 814 +-
.../nvptx_multi_target_parallel_codegen.cpp | 124 +-
.../OpenMP/nvptx_nested_parallel_codegen.cpp | 244 +-
clang/test/OpenMP/nvptx_parallel_codegen.cpp | 588 +-
.../OpenMP/nvptx_parallel_for_codegen.cpp | 149 +-
clang/test/OpenMP/nvptx_target_codegen.cpp | 694 +-
.../OpenMP/nvptx_target_parallel_codegen.cpp | 208 +-
...tx_target_parallel_num_threads_codegen.cpp | 222 +-
...vptx_target_parallel_proc_bind_codegen.cpp | 906 +-
...vptx_target_parallel_reduction_codegen.cpp | 2031 +-
...arallel_reduction_codegen_tbaa_PR46146.cpp | 1220 +-
.../test/OpenMP/nvptx_target_simd_codegen.cpp | 1464 +-
.../OpenMP/nvptx_target_teams_codegen.cpp | 392 +-
.../nvptx_target_teams_distribute_codegen.cpp | 250 +-
..._teams_distribute_parallel_for_codegen.cpp | 5769 ++--
...bute_parallel_for_generic_mode_codegen.cpp | 754 +-
...s_distribute_parallel_for_simd_codegen.cpp | 2632 +-
...x_target_teams_distribute_simd_codegen.cpp | 4014 ++-
...vptx_target_teams_generic_loop_codegen.cpp | 4671 +--
...eams_generic_loop_generic_mode_codegen.cpp | 342 +-
.../nvptx_target_teams_ompx_bare_codegen.cpp | 43 +-
clang/test/OpenMP/nvptx_teams_codegen.cpp | 368 +-
.../OpenMP/nvptx_teams_reduction_codegen.cpp | 3984 ++-
.../OpenMP/nvptx_unsupported_type_codegen.cpp | 9 +-
clang/test/OpenMP/reduction_complex.c | 796 +-
clang/test/OpenMP/reduction_implicit_map.cpp | 156 +-
.../OpenMP/target_parallel_debug_codegen.cpp | 1308 +-
.../target_parallel_for_debug_codegen.cpp | 1719 +-
...target_parallel_generic_loop_codegen-3.cpp | 1719 +-
34 files changed, 37515 insertions(+), 29120 deletions(-)
diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
index b3c810b4b1455..69c1aa8e267d6 100644
--- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp
+++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
@@ -27,20 +27,23 @@ int maini1() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -50,15 +53,19 @@ int maini1() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6maini1v_l16_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR7:[0-9]+]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[B_ASCAST]]) #[[ATTR7:[0-9]+]]
// CHECK1-NEXT: [[CALL1:%.*]] = call noundef i32 @_Z3barv() #[[ATTR7]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CALL]], [[CALL1]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
@@ -68,9 +75,12 @@ int maini1() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3fooRi
// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT: ret i32 [[TMP1]]
//
@@ -78,6 +88,9 @@ int maini1() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3barv
// CHECK1-SAME: () #[[ATTR2]] {
// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[CLEANUP_DEST_SLOT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK1-NEXT: [[A:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooRi(ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR7]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4)
diff --git a/clang/test/OpenMP/nvptx_NRVO_variable.cpp b/clang/test/OpenMP/nvptx_NRVO_variable.cpp
index 010b43c2f5dab..932b8d96ba93c 100644
--- a/clang/test/OpenMP/nvptx_NRVO_variable.cpp
+++ b/clang/test/OpenMP/nvptx_NRVO_variable.cpp
@@ -14,13 +14,14 @@ struct S {
void bar(S &);
// CHECK-LABEL: foo
S foo() {
- // CHECK: [[RETVAL:%.+]] = alloca %struct.S,
+ // CHECK: [[RETVAL:%.*]] = alloca [[STRUCT_S:%.*]], align 4, addrspace(5)
+ // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
S s;
// CHECK: call void @{{.+}}bar{{.+}}(ptr {{.*}}[[S_REF:%.+]])
bar(s);
- // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[RETVAL]], ptr {{.*}}[[S_REF]], i64 4, i1 false)
- // CHECK: [[VAL:%.+]] = load %struct.S, ptr [[RETVAL]],
- // CHECK: ret %struct.S [[VAL]]
+ // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[RETVAL_ASCAST]], ptr align 4 [[S]], i64 4, i1 false)
+ // CHECK: [[TMP0:%.*]] = load [[STRUCT_S]], ptr [[RETVAL_ASCAST]], align 4
+ // CHECK: ret [[STRUCT_S]] [[TMP0]]
return s;
}
#pragma omp end declare target
diff --git a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
index 54f7694b55da3..ce7c425494319 100644
--- a/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_SPMD_codegen.cpp
@@ -275,26 +275,31 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK-64-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-64-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-64-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-64-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -304,174 +309,188 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED15:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS17:%.*]] = alloca [3 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED15:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS17:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__CASTED15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED15]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS17]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-64-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-64-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-64: omp_if.then:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221:![0-9]+]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP11]] to i1
-// CHECK-64-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK-64-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[LOADEDV2:%.*]] = trunc i8 [[TMP11]] to i1
+// CHECK-64-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK-64-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 8, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK-64-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK-64-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP12]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP19]] to i1
-// CHECK-64-NEXT: [[TMP20:%.*]] = zext i1 [[TOBOOL3]] to i32
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP20]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP221]]
+// CHECK-64-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[LOADEDV3:%.*]] = trunc i8 [[TMP19]] to i1
+// CHECK-64-NEXT: [[TMP20:%.*]] = zext i1 [[LOADEDV3]] to i32
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP20]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3), !llvm.access.group [[ACC_GRP67]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP27]], 9
// CHECK-64-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
// CHECK-64: cond.true7:
// CHECK-64-NEXT: br label [[COND_END9:%.*]]
// CHECK-64: cond.false8:
-// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-64-NEXT: br label [[COND_END9]]
// CHECK-64: cond.end9:
// CHECK-64-NEXT: [[COND10:%.*]] = phi i32 [ 9, [[COND_TRUE7]] ], [ [[TMP28]], [[COND_FALSE8]] ]
-// CHECK-64-NEXT: store i32 [[COND10]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP222:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: store i32 [[TMP29]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-64: omp_if.else:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND11:%.*]]
// CHECK-64: omp.inner.for.cond11:
-// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP30]], 10
// CHECK-64-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY13:%.*]], label [[OMP_INNER_FOR_END28:%.*]]
// CHECK-64: omp.inner.for.body13:
-// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP32:%.*]] = zext i32 [[TMP31]] to i64
-// CHECK-64-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP34:%.*]] = zext i32 [[TMP33]] to i64
-// CHECK-64-NEXT: [[TMP35:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-64-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP35]] to i1
-// CHECK-64-NEXT: [[FROMBOOL16:%.*]] = zext i1 [[TOBOOL14]] to i8
-// CHECK-64-NEXT: store i8 [[FROMBOOL16]], ptr [[DOTCAPTURE_EXPR__CASTED15]], align 1
-// CHECK-64-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED15]], align 8
-// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP35:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-64-NEXT: [[LOADEDV14:%.*]] = trunc i8 [[TMP35]] to i1
+// CHECK-64-NEXT: [[STOREDV16:%.*]] = zext i1 [[LOADEDV14]] to i8
+// CHECK-64-NEXT: store i8 [[STOREDV16]], ptr [[DOTCAPTURE_EXPR__CASTED15_ASCAST]], align 1
+// CHECK-64-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED15_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP38:%.*]] = inttoptr i64 [[TMP32]] to ptr
// CHECK-64-NEXT: store ptr [[TMP38]], ptr [[TMP37]], align 8
-// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP40:%.*]] = inttoptr i64 [[TMP34]] to ptr
// CHECK-64-NEXT: store ptr [[TMP40]], ptr [[TMP39]], align 8
-// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i64 0, i64 2
+// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i64 0, i64 2
// CHECK-64-NEXT: [[TMP42:%.*]] = inttoptr i64 [[TMP36]] to ptr
// CHECK-64-NEXT: store ptr [[TMP42]], ptr [[TMP41]], align 8
-// CHECK-64-NEXT: [[TMP43:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-64-NEXT: [[TOBOOL18:%.*]] = trunc i8 [[TMP43]] to i1
-// CHECK-64-NEXT: [[TMP44:%.*]] = zext i1 [[TOBOOL18]] to i32
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP44]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS17]], i64 3)
+// CHECK-64-NEXT: [[TMP43:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-64-NEXT: [[LOADEDV18:%.*]] = trunc i8 [[TMP43]] to i1
+// CHECK-64-NEXT: [[TMP44:%.*]] = zext i1 [[LOADEDV18]] to i32
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP44]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i64 3)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]]
// CHECK-64: omp.inner.for.inc19:
-// CHECK-64-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP45]], [[TMP46]]
-// CHECK-64-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP47]], [[TMP48]]
-// CHECK-64-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP49]], [[TMP50]]
-// CHECK-64-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP51:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP23:%.*]] = icmp sgt i32 [[TMP51]], 9
// CHECK-64-NEXT: br i1 [[CMP23]], label [[COND_TRUE24:%.*]], label [[COND_FALSE25:%.*]]
// CHECK-64: cond.true24:
// CHECK-64-NEXT: br label [[COND_END26:%.*]]
// CHECK-64: cond.false25:
-// CHECK-64-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP52:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END26]]
// CHECK-64: cond.end26:
// CHECK-64-NEXT: [[COND27:%.*]] = phi i32 [ 9, [[COND_TRUE24]] ], [ [[TMP52]], [[COND_FALSE25]] ]
-// CHECK-64-NEXT: store i32 [[COND27]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP53]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP225:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND27]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP53:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP53]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP71:![0-9]+]]
// CHECK-64: omp.inner.for.end28:
// CHECK-64-NEXT: br label [[OMP_IF_END]]
// CHECK-64: omp_if.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP54:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0
// CHECK-64-NEXT: br i1 [[TMP55]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -480,105 +499,117 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-64-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-64-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-64: omp_if.then:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227:![0-9]+]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]]
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP227]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP73]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP228:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-64: omp_if.else:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND4:%.*]]
// CHECK-64: omp.inner.for.cond4:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CONV5:%.*]] = sext i32 [[TMP14]] to i64
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP15]]
// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY7:%.*]], label [[OMP_INNER_FOR_END13:%.*]]
// CHECK-64: omp.inner.for.body7:
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
-// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE10:%.*]]
// CHECK-64: omp.body.continue10:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC11:%.*]]
// CHECK-64: omp.inner.for.inc11:
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP230:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP76:![0-9]+]]
// CHECK-64: omp.inner.for.end13:
// CHECK-64-NEXT: br label [[OMP_IF_END]]
// CHECK-64: omp_if.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
-// CHECK-64-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -587,105 +618,117 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-64-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-64-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-64: omp_if.then:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77:![0-9]+]]
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP231]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP77]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP78:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-64: omp_if.else:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND4:%.*]]
// CHECK-64: omp.inner.for.cond4:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CONV5:%.*]] = sext i32 [[TMP14]] to i64
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP15]]
// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY7:%.*]], label [[OMP_INNER_FOR_END13:%.*]]
// CHECK-64: omp.inner.for.body7:
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 0, [[MUL8]]
-// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE10:%.*]]
// CHECK-64: omp.body.continue10:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC11:%.*]]
// CHECK-64: omp.inner.for.inc11:
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP234:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP80:![0-9]+]]
// CHECK-64: omp.inner.for.end13:
// CHECK-64-NEXT: br label [[OMP_IF_END]]
// CHECK-64: omp_if.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
-// CHECK-64-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-64-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -694,18 +737,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -715,93 +761,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP235]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP81]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP236:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP82:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -810,75 +866,86 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238:![0-9]+]]
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84:![0-9]+]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP239:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -887,18 +954,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -908,93 +978,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP241]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP87]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP242:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP88:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1003,66 +1083,77 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90:![0-9]+]]
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP244]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP90]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP245:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP91:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1071,18 +1162,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -1092,93 +1186,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP247]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP93]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP248:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1187,74 +1291,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP251:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP97:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1263,18 +1378,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -1284,93 +1402,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP253]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP99]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP254:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP100:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1379,74 +1507,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP257:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP103:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1455,18 +1594,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -1476,93 +1618,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP259]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP105]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP260:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP106:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1571,74 +1723,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP263:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP109:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1647,18 +1810,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -1668,93 +1834,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP265]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP111]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP266:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP112:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1763,74 +1939,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP269:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP115:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -1839,24 +2026,29 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -1866,104 +2058,116 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[A1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK-64-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: store i32 [[TMP10]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8
-// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK-64-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP11]] to ptr
// CHECK-64-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 9
// CHECK-64-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK-64: cond.true6:
// CHECK-64-NEXT: br label [[COND_END8:%.*]]
// CHECK-64: cond.false7:
-// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END8]]
// CHECK-64: cond.end8:
// CHECK-64-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ]
-// CHECK-64-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK-64-NEXT: br i1 [[TMP28]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-64: .omp.lastprivate.then:
// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK-64-NEXT: store i32 [[TMP29]], ptr [[A_ADDR]], align 4
+// CHECK-64-NEXT: store i32 [[TMP29]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-64: .omp.lastprivate.done:
// CHECK-64-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i64 4)
@@ -1973,72 +2177,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[A2:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[A2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[A2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CONV3:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4
-// CHECK-64-NEXT: store i32 [[TMP8]], ptr [[A2]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP8]], ptr [[A2_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-64: .omp.lastprivate.then:
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[A2]], align 4
-// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[A_ADDR]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[A2_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP13]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-64: .omp.lastprivate.done:
// CHECK-64-NEXT: ret void
@@ -2047,18 +2264,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2068,83 +2288,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2156,65 +2386,76 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2226,18 +2467,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2247,83 +2491,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2335,56 +2589,67 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2396,18 +2661,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2417,83 +2685,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2505,63 +2783,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP272:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP118:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -2574,18 +2863,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2595,83 +2887,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2683,63 +2985,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP275:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP121:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -2752,18 +3065,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2773,83 +3089,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2861,63 +3187,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP278:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP124:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -2930,18 +3267,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2951,83 +3291,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -3039,63 +3389,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP281:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP127:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -3108,18 +3469,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -3129,94 +3493,105 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[B:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP283]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP129]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP284:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP130:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -3225,66 +3600,77 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132:![0-9]+]]
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP286]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP132]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP287:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP133:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -3293,18 +3679,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -3314,95 +3703,106 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[B:%.*]] = alloca [3 x i32], align 4
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B]], ptr align 4 @"__const.<captured>.b", i64 12, i1 false)
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B:%.*]] = alloca [3 x i32], align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B_ASCAST]], ptr align 4 @"__const.<captured>.b", i64 12, i1 false)
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP289]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP135]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP290:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP136:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -3411,75 +3811,86 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292:![0-9]+]]
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138:![0-9]+]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP293:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP139:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-64-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -3488,18 +3899,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -3509,74 +3923,84 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK-64-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295:![0-9]+]]
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP295]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141:![0-9]+]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP295]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
// CHECK-64-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP295]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
// CHECK-64-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP295]]
-// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP141]]
+// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP295]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP295]]
+// CHECK-64-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP141]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP141]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP295]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP296:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP142:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
// CHECK-64-NEXT: br i1 [[TMP18]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: store ptr [[B]], ptr [[C]], align 8
@@ -3588,66 +4012,77 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]]
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP298]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP144]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP299:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-64-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -3656,38 +4091,45 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper
// CHECK-64-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-64-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK-64-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], i64 [[TMP6]]) #[[ATTR2]]
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], i64 [[TMP6]]) #[[ATTR2]]
// CHECK-64-NEXT: ret void
//
//
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -3697,93 +4139,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP301]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP147]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP302:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -3792,74 +4244,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP305:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -3868,18 +4331,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -3889,93 +4355,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP307]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP153]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP308:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -3984,74 +4460,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP311:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -4060,18 +4547,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -4081,93 +4571,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP313]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP159]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP314:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -4176,74 +4676,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP317:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -4252,18 +4763,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -4273,93 +4787,103 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
-// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2), !llvm.access.group [[ACC_GRP319]]
+// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2), !llvm.access.group [[ACC_GRP165]]
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP320:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK-64-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -4368,74 +4892,85 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP323:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-64-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -4444,18 +4979,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -4465,83 +5003,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -4553,56 +5101,67 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -4614,18 +5173,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -4635,83 +5197,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -4723,65 +5295,76 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -4793,18 +5376,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -4814,83 +5400,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -4902,56 +5498,67 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -4963,18 +5570,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -4984,83 +5594,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -5072,63 +5692,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP326:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -5141,18 +5772,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -5162,83 +5796,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -5250,63 +5894,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP329:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -5319,18 +5974,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -5340,83 +5998,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -5428,63 +6096,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP332:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -5497,18 +6176,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -5518,83 +6200,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -5606,63 +6298,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP335:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -5675,18 +6378,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -5696,83 +6402,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -5784,56 +6500,67 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -5845,18 +6572,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -5866,83 +6596,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -5954,65 +6694,76 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -6024,18 +6775,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -6045,83 +6799,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -6133,56 +6897,67 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CONV2:%.*]] = sext i32 [[TMP5]] to i64
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -6194,18 +6969,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -6215,83 +6993,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -6303,63 +7091,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP338:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -6372,18 +7171,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -6393,83 +7195,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -6481,63 +7293,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP341:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -6550,18 +7373,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -6571,83 +7397,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -6659,63 +7495,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP344:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -6728,18 +7575,21 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -6749,83 +7599,93 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP7]] to ptr
// CHECK-64-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 8
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP9]] to ptr
// CHECK-64-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-64: cond.true5:
// CHECK-64-NEXT: br label [[COND_END7:%.*]]
// CHECK-64: cond.false6:
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END7]]
// CHECK-64: cond.end7:
// CHECK-64-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -6837,63 +7697,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP347:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -6906,20 +7777,23 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-64-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[LOADEDV]] to i32
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -6929,74 +7803,83 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -7006,15 +7889,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7024,55 +7909,64 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -7084,15 +7978,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7102,74 +7998,83 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -7179,15 +8084,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7197,51 +8104,60 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP350:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -7254,15 +8170,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7272,51 +8190,60 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP353:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -7329,15 +8256,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7347,51 +8276,60 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP356:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -7404,15 +8342,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7422,51 +8362,60 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP359:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -7479,20 +8428,23 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-64-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-64-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-64-NEXT: [[TMP3:%.*]] = zext i1 [[LOADEDV]] to i32
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7502,82 +8454,91 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361:![0-9]+]]
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP362:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP1]])
@@ -7587,15 +8548,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7605,65 +8568,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364:![0-9]+]]
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP365:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -7673,15 +8645,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7691,82 +8665,91 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367:![0-9]+]]
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP368:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -7776,15 +8759,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7794,62 +8779,71 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP371:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -7859,15 +8853,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7877,62 +8873,71 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP374:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -7942,15 +8947,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -7960,62 +8967,71 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP377:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8025,15 +9041,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8043,62 +9061,71 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP380:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8108,15 +9135,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8126,63 +9155,72 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
-// CHECK-64-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP382]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP383:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
+// CHECK-64-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP228]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8192,15 +9230,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8210,65 +9250,74 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385:![0-9]+]]
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP386:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-64: omp.loop.exit:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK-64-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8278,15 +9327,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8296,82 +9347,91 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388:![0-9]+]]
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP389:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-64-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8381,15 +9441,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8399,62 +9461,71 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP392:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8464,15 +9535,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8482,62 +9555,71 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP395:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8547,15 +9629,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8565,62 +9649,71 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP398:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8630,15 +9723,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8648,62 +9743,71 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP401:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-64-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -8713,15 +9817,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8731,74 +9837,83 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -8808,15 +9923,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8826,55 +9943,64 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -8886,15 +10012,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8904,74 +10032,83 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-64-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -8981,15 +10118,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -8999,51 +10138,60 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP404:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -9056,15 +10204,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -9074,51 +10224,60 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP407:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -9131,15 +10290,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -9149,51 +10310,60 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP410:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -9206,15 +10376,17 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -9224,51 +10396,60 @@ int a;
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-64-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-64-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412:![0-9]+]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP413:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
@@ -9281,26 +10462,31 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK-32-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-32-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -9310,170 +10496,184 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED15:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS17:%.*]] = alloca [3 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED15:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS17:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__CASTED15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED15]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS17]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-32-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-32: omp_if.then:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1
-// CHECK-32-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK-32-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[LOADEDV2:%.*]] = trunc i8 [[TMP9]] to i1
+// CHECK-32-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK-32-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK-32-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP17]] to i1
-// CHECK-32-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL3]] to i32
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[LOADEDV3:%.*]] = trunc i8 [[TMP17]] to i1
+// CHECK-32-NEXT: [[TMP18:%.*]] = zext i1 [[LOADEDV3]] to i32
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3), !llvm.access.group [[ACC_GRP67]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP25]], 9
// CHECK-32-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
// CHECK-32: cond.true7:
// CHECK-32-NEXT: br label [[COND_END9:%.*]]
// CHECK-32: cond.false8:
-// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-NEXT: br label [[COND_END9]]
// CHECK-32: cond.end9:
// CHECK-32-NEXT: [[COND10:%.*]] = phi i32 [ 9, [[COND_TRUE7]] ], [ [[TMP26]], [[COND_FALSE8]] ]
-// CHECK-32-NEXT: store i32 [[COND10]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP222:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-32: omp_if.else:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND11:%.*]]
// CHECK-32: omp.inner.for.cond11:
-// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP28]], 10
// CHECK-32-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY13:%.*]], label [[OMP_INNER_FOR_END28:%.*]]
// CHECK-32: omp.inner.for.body13:
-// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP31:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP31]] to i1
-// CHECK-32-NEXT: [[FROMBOOL16:%.*]] = zext i1 [[TOBOOL14]] to i8
-// CHECK-32-NEXT: store i8 [[FROMBOOL16]], ptr [[DOTCAPTURE_EXPR__CASTED15]], align 1
-// CHECK-32-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED15]], align 4
-// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP31:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-NEXT: [[LOADEDV14:%.*]] = trunc i8 [[TMP31]] to i1
+// CHECK-32-NEXT: [[STOREDV16:%.*]] = zext i1 [[LOADEDV14]] to i8
+// CHECK-32-NEXT: store i8 [[STOREDV16]], ptr [[DOTCAPTURE_EXPR__CASTED15_ASCAST]], align 1
+// CHECK-32-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED15_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP29]] to ptr
// CHECK-32-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 4
-// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP30]] to ptr
// CHECK-32-NEXT: store ptr [[TMP36]], ptr [[TMP35]], align 4
-// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i32 0, i32 2
+// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP38:%.*]] = inttoptr i32 [[TMP32]] to ptr
// CHECK-32-NEXT: store ptr [[TMP38]], ptr [[TMP37]], align 4
-// CHECK-32-NEXT: [[TMP39:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-NEXT: [[TOBOOL18:%.*]] = trunc i8 [[TMP39]] to i1
-// CHECK-32-NEXT: [[TMP40:%.*]] = zext i1 [[TOBOOL18]] to i32
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP40]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS17]], i32 3)
+// CHECK-32-NEXT: [[TMP39:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-NEXT: [[LOADEDV18:%.*]] = trunc i8 [[TMP39]] to i1
+// CHECK-32-NEXT: [[TMP40:%.*]] = zext i1 [[LOADEDV18]] to i32
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP40]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i32 3)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]]
// CHECK-32: omp.inner.for.inc19:
-// CHECK-32-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
-// CHECK-32-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP43]], [[TMP44]]
-// CHECK-32-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP45]], [[TMP46]]
-// CHECK-32-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP23:%.*]] = icmp sgt i32 [[TMP47]], 9
// CHECK-32-NEXT: br i1 [[CMP23]], label [[COND_TRUE24:%.*]], label [[COND_FALSE25:%.*]]
// CHECK-32: cond.true24:
// CHECK-32-NEXT: br label [[COND_END26:%.*]]
// CHECK-32: cond.false25:
-// CHECK-32-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END26]]
// CHECK-32: cond.end26:
// CHECK-32-NEXT: [[COND27:%.*]] = phi i32 [ 9, [[COND_TRUE24]] ], [ [[TMP48]], [[COND_FALSE25]] ]
-// CHECK-32-NEXT: store i32 [[COND27]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP49]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP225:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND27]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP49]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP71:![0-9]+]]
// CHECK-32: omp.inner.for.end28:
// CHECK-32-NEXT: br label [[OMP_IF_END]]
// CHECK-32: omp_if.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
// CHECK-32-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -9482,101 +10682,113 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-32: omp_if.then:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227:![0-9]+]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP228:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-32: omp_if.else:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]]
// CHECK-32: omp.inner.for.cond2:
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]]
// CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]]
// CHECK-32: omp.inner.for.body4:
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]]
// CHECK-32: omp.body.continue7:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]]
// CHECK-32: omp.inner.for.inc8:
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP230:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP76:![0-9]+]]
// CHECK-32: omp.inner.for.end10:
// CHECK-32-NEXT: br label [[OMP_IF_END]]
// CHECK-32: omp_if.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -9585,101 +10797,113 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-32: omp_if.then:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77:![0-9]+]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP78:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-32: omp_if.else:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]]
// CHECK-32: omp.inner.for.cond2:
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]]
// CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]]
// CHECK-32: omp.inner.for.body4:
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]]
// CHECK-32: omp.body.continue7:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]]
// CHECK-32: omp.inner.for.inc8:
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP234:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP80:![0-9]+]]
// CHECK-32: omp.inner.for.end10:
// CHECK-32-NEXT: br label [[OMP_IF_END]]
// CHECK-32: omp_if.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -9688,18 +10912,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -9709,91 +10936,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP81]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP236:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP82:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -9802,73 +11039,84 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238:![0-9]+]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84:![0-9]+]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP239:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -9877,18 +11125,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -9898,91 +11149,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP87]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP242:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP88:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -9991,63 +11252,74 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244:![0-9]+]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP245:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP91:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10056,18 +11328,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -10077,91 +11352,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP93]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP248:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10170,72 +11455,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP251:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP97:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10244,18 +11540,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -10265,91 +11564,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP99]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP254:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP100:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10358,72 +11667,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP257:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP103:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10432,18 +11752,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -10453,91 +11776,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP105]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP260:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP106:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10546,72 +11879,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP263:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP109:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10620,18 +11964,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -10641,91 +11988,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP111]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP266:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP112:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10734,72 +12091,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP269:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP115:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -10808,24 +12176,29 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -10835,102 +12208,114 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[A1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK-32-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: store i32 [[TMP8]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to ptr
// CHECK-32-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP22]], 9
// CHECK-32-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK-32: cond.true6:
// CHECK-32-NEXT: br label [[COND_END8:%.*]]
// CHECK-32: cond.false7:
-// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END8]]
// CHECK-32: cond.end8:
// CHECK-32-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP23]], [[COND_FALSE7]] ]
-// CHECK-32-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
// CHECK-32-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-32: .omp.lastprivate.then:
// CHECK-32-NEXT: [[TMP27:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK-32-NEXT: store i32 [[TMP27]], ptr [[A_ADDR]], align 4
+// CHECK-32-NEXT: store i32 [[TMP27]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-32: .omp.lastprivate.done:
// CHECK-32-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4)
@@ -10940,69 +12325,82 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4
-// CHECK-32-NEXT: store i32 [[TMP8]], ptr [[A1]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP8]], ptr [[A1_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-32: .omp.lastprivate.then:
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[A_ADDR]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP13]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-32: .omp.lastprivate.done:
// CHECK-32-NEXT: ret void
@@ -11011,18 +12409,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -11032,81 +12433,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -11118,63 +12529,74 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -11186,18 +12608,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -11207,81 +12632,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -11293,53 +12728,64 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -11351,18 +12797,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -11372,81 +12821,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -11458,61 +12917,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP272:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP118:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -11525,18 +12995,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -11546,81 +13019,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -11632,61 +13115,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP275:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP121:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -11699,18 +13193,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -11720,81 +13217,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -11806,61 +13313,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP278:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP124:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -11873,18 +13391,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -11894,81 +13415,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -11980,61 +13511,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP281:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP127:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -12047,18 +13589,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -12068,92 +13613,103 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[B:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP129]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP284:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP130:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12162,63 +13718,74 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286:![0-9]+]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP287:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP133:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12227,18 +13794,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -12248,93 +13818,104 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[B:%.*]] = alloca [3 x i32], align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 @"__const.<captured>.b", i32 12, i1 false)
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B:%.*]] = alloca [3 x i32], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B_ASCAST]], ptr align 4 @"__const.<captured>.b", i32 12, i1 false)
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP135]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP290:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP136:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12343,73 +13924,84 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292:![0-9]+]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138:![0-9]+]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP293:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP139:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-32-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12418,18 +14010,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -12439,72 +14034,82 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK-32-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295:![0-9]+]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP295]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP295]]
+// CHECK-32-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP141]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP295]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP296:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP142:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: store ptr [[B]], ptr [[C]], align 4
@@ -12516,63 +14121,74 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298:![0-9]+]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP299:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-32-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12581,38 +14197,45 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper
// CHECK-32-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
+// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK-32-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], i32 [[TMP6]]) #[[ATTR2]]
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], i32 [[TMP6]]) #[[ATTR2]]
// CHECK-32-NEXT: ret void
//
//
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -12622,91 +14245,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP147]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP302:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12715,72 +14348,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP305:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12789,18 +14433,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -12810,91 +14457,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP153]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP308:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12903,72 +14560,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP311:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -12977,18 +14645,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -12998,91 +14669,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP159]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP314:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -13091,72 +14772,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP317:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -13165,18 +14857,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -13186,91 +14881,101 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP165]]
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP320:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -13279,72 +14984,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP323:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -13353,18 +15069,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -13374,81 +15093,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -13460,53 +15189,64 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -13518,18 +15258,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -13539,81 +15282,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -13625,63 +15378,74 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -13693,18 +15457,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -13714,81 +15481,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -13800,53 +15577,64 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -13858,18 +15646,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -13879,81 +15670,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -13965,61 +15766,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP326:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -14032,18 +15844,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -14053,81 +15868,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -14139,61 +15964,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP329:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -14206,18 +16042,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -14227,81 +16066,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -14313,61 +16162,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP332:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -14380,18 +16240,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -14401,81 +16264,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -14487,61 +16360,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP335:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -14554,18 +16438,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -14575,81 +16462,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -14661,53 +16558,64 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -14719,18 +16627,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -14740,81 +16651,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -14826,63 +16747,74 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -14894,18 +16826,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -14915,81 +16850,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -15001,53 +16946,64 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -15059,18 +17015,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -15080,81 +17039,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -15166,61 +17135,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP338:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -15233,18 +17213,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -15254,81 +17237,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -15340,61 +17333,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP341:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -15407,18 +17411,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -15428,81 +17435,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -15514,61 +17531,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP344:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -15581,18 +17609,21 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -15602,81 +17633,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32: cond.true5:
// CHECK-32-NEXT: br label [[COND_END7:%.*]]
// CHECK-32: cond.false6:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END7]]
// CHECK-32: cond.end7:
// CHECK-32-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -15688,61 +17729,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP347:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -15755,20 +17807,23 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[LOADEDV]] to i32
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -15778,74 +17833,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -15855,15 +17919,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -15873,55 +17939,64 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -15933,15 +18008,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -15951,74 +18028,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -16028,15 +18114,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16046,51 +18134,60 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP350:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -16103,15 +18200,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16121,51 +18220,60 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP353:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -16178,15 +18286,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16196,51 +18306,60 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP356:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -16253,15 +18372,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16271,51 +18392,60 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP359:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -16328,20 +18458,23 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-NEXT: [[TMP3:%.*]] = zext i1 [[LOADEDV]] to i32
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16351,82 +18484,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361:![0-9]+]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP362:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP1]])
@@ -16436,15 +18578,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16454,65 +18598,74 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364:![0-9]+]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP365:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -16522,15 +18675,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16540,82 +18695,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367:![0-9]+]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP368:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -16625,15 +18789,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16643,62 +18809,71 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP371:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -16708,15 +18883,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16726,62 +18903,71 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP374:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -16791,15 +18977,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16809,62 +18997,71 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP377:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -16874,15 +19071,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16892,62 +19091,71 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP380:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -16957,15 +19165,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -16975,63 +19185,72 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
-// CHECK-32-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP382]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP383:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
+// CHECK-32-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP228]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -17041,15 +19260,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17059,65 +19280,74 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385:![0-9]+]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP386:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32: omp.loop.exit:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK-32-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -17127,15 +19357,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17145,82 +19377,91 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388:![0-9]+]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP389:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-32-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -17230,15 +19471,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17248,62 +19491,71 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP392:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -17313,15 +19565,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17331,62 +19585,71 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP395:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -17396,15 +19659,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17414,62 +19679,71 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP398:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -17479,15 +19753,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17497,62 +19773,71 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP401:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -17562,15 +19847,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17580,74 +19867,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -17657,15 +19953,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17675,55 +19973,64 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -17735,15 +20042,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17753,74 +20062,83 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -17830,15 +20148,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17848,51 +20168,60 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP404:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -17905,15 +20234,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17923,51 +20254,60 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP407:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -17980,15 +20320,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -17998,51 +20340,60 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP410:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -18055,15 +20406,17 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -18073,51 +20426,60 @@ int a;
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412:![0-9]+]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP413:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
@@ -18130,26 +20492,31 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-EX-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK-32-EX-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -18159,170 +20526,184 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED15:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS17:%.*]] = alloca [3 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED15:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS17:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__CASTED15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED15]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS17_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS17]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK-32-EX-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-32-EX: omp_if.then:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP9]] to i1
-// CHECK-32-EX-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8
-// CHECK-32-EX-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[LOADEDV2:%.*]] = trunc i8 [[TMP9]] to i1
+// CHECK-32-EX-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV2]] to i8
+// CHECK-32-EX-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK-32-EX-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = inttoptr i32 [[TMP10]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP17]] to i1
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL3]] to i32
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-EX-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[LOADEDV3:%.*]] = trunc i8 [[TMP17]] to i1
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = zext i1 [[LOADEDV3]] to i32
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3), !llvm.access.group [[ACC_GRP67]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP25]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]]
// CHECK-32-EX: cond.true7:
// CHECK-32-EX-NEXT: br label [[COND_END9:%.*]]
// CHECK-32-EX: cond.false8:
-// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
+// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
// CHECK-32-EX-NEXT: br label [[COND_END9]]
// CHECK-32-EX: cond.end9:
// CHECK-32-EX-NEXT: [[COND10:%.*]] = phi i32 [ 9, [[COND_TRUE7]] ], [ [[TMP26]], [[COND_FALSE8]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND10]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP221]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP222:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP67]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-32-EX: omp_if.else:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND11:%.*]]
// CHECK-32-EX: omp.inner.for.cond11:
-// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP12:%.*]] = icmp slt i32 [[TMP28]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY13:%.*]], label [[OMP_INNER_FOR_END28:%.*]]
// CHECK-32-EX: omp.inner.for.body13:
-// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-EX-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP31]] to i1
-// CHECK-32-EX-NEXT: [[FROMBOOL16:%.*]] = zext i1 [[TOBOOL14]] to i8
-// CHECK-32-EX-NEXT: store i8 [[FROMBOOL16]], ptr [[DOTCAPTURE_EXPR__CASTED15]], align 1
-// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED15]], align 4
-// CHECK-32-EX-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[LOADEDV14:%.*]] = trunc i8 [[TMP31]] to i1
+// CHECK-32-EX-NEXT: [[STOREDV16:%.*]] = zext i1 [[LOADEDV14]] to i8
+// CHECK-32-EX-NEXT: store i8 [[STOREDV16]], ptr [[DOTCAPTURE_EXPR__CASTED15_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED15_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP29]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP34]], ptr [[TMP33]], align 4
-// CHECK-32-EX-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP35:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP36:%.*]] = inttoptr i32 [[TMP30]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP36]], ptr [[TMP35]], align 4
-// CHECK-32-EX-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17]], i32 0, i32 2
+// CHECK-32-EX-NEXT: [[TMP37:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i32 0, i32 2
// CHECK-32-EX-NEXT: [[TMP38:%.*]] = inttoptr i32 [[TMP32]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP38]], ptr [[TMP37]], align 4
-// CHECK-32-EX-NEXT: [[TMP39:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-EX-NEXT: [[TOBOOL18:%.*]] = trunc i8 [[TMP39]] to i1
-// CHECK-32-EX-NEXT: [[TMP40:%.*]] = zext i1 [[TOBOOL18]] to i32
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP40]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS17]], i32 3)
+// CHECK-32-EX-NEXT: [[TMP39:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[LOADEDV18:%.*]] = trunc i8 [[TMP39]] to i1
+// CHECK-32-EX-NEXT: [[TMP40:%.*]] = zext i1 [[LOADEDV18]] to i32
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP40]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1, ptr null, ptr [[CAPTURED_VARS_ADDRS17_ASCAST]], i32 3)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC19:%.*]]
// CHECK-32-EX: omp.inner.for.inc19:
-// CHECK-32-EX-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP41]], [[TMP42]]
-// CHECK-32-EX-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP43]], [[TMP44]]
-// CHECK-32-EX-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP45]], [[TMP46]]
-// CHECK-32-EX-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD22]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP23:%.*]] = icmp sgt i32 [[TMP47]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP23]], label [[COND_TRUE24:%.*]], label [[COND_FALSE25:%.*]]
// CHECK-32-EX: cond.true24:
// CHECK-32-EX-NEXT: br label [[COND_END26:%.*]]
// CHECK-32-EX: cond.false25:
-// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END26]]
// CHECK-32-EX: cond.end26:
// CHECK-32-EX-NEXT: [[COND27:%.*]] = phi i32 [ 9, [[COND_TRUE24]] ], [ [[TMP48]], [[COND_FALSE25]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND27]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP49]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP225:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND27]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP49]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP71:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end28:
// CHECK-32-EX-NEXT: br label [[OMP_IF_END]]
// CHECK-32-EX: omp_if.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP50:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -18331,101 +20712,113 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-EX-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-32-EX: omp_if.then:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP227]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP227]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP228:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP73]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-32-EX: omp_if.else:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]]
// CHECK-32-EX: omp.inner.for.cond2:
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]]
// CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]]
// CHECK-32-EX: omp.inner.for.body4:
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]]
// CHECK-32-EX: omp.body.continue7:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]]
// CHECK-32-EX: omp.inner.for.inc8:
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP230:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP76:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end10:
// CHECK-32-EX-NEXT: br label [[OMP_IF_END]]
// CHECK-32-EX: omp_if.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -18434,101 +20827,113 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15_omp_outlined_omp_outlined1
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-EX-NEXT: br i1 [[LOADEDV]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]]
// CHECK-32-EX: omp_if.then:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP231]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP77]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP78:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_IF_END:%.*]]
// CHECK-32-EX: omp_if.else:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2:%.*]]
// CHECK-32-EX: omp.inner.for.cond2:
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]]
// CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY4:%.*]], label [[OMP_INNER_FOR_END10:%.*]]
// CHECK-32-EX: omp.inner.for.body4:
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]]
-// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE7:%.*]]
// CHECK-32-EX: omp.body.continue7:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC8:%.*]]
// CHECK-32-EX: omp.inner.for.inc8:
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP234:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND2]], !llvm.loop [[LOOP80:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end10:
// CHECK-32-EX-NEXT: br label [[OMP_IF_END]]
// CHECK-32-EX: omp_if.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -18537,18 +20942,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -18558,91 +20966,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP81]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP235]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP236:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP81]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP82:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -18651,73 +21069,84 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l18_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP238]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP239:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP84]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -18726,18 +21155,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -18747,91 +21179,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP87]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP241]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP242:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP87]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP88:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -18840,63 +21282,74 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l21_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP244]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP244]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP245:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP90]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP91:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -18905,18 +21358,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -18926,91 +21382,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP93]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP247]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP248:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP93]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -19019,72 +21485,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l24_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP250]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP251:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP96]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP97:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -19093,18 +21570,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -19114,91 +21594,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP99]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP253]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP254:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP99]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP100:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -19207,72 +21697,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l27_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP256]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP257:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP102]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP103:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -19281,18 +21782,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -19302,91 +21806,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP105]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP259]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP260:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP105]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP106:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -19395,72 +21909,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l30_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP262]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP263:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP108]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP109:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -19469,18 +21994,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -19490,91 +22018,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP111]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP265]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP266:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP111]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP112:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -19583,72 +22121,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l33_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP268]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP269:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP114]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP115:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -19657,24 +22206,29 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[A_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -19684,102 +22238,114 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[A1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: store i32 [[TMP8]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP13]], ptr [[TMP12]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK-32-EX-NEXT: [[TMP15:%.*]] = inttoptr i32 [[TMP9]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP22]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK-32-EX: cond.true6:
// CHECK-32-EX-NEXT: br label [[COND_END8:%.*]]
// CHECK-32-EX: cond.false7:
-// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END8]]
// CHECK-32-EX: cond.end8:
// CHECK-32-EX-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP23]], [[COND_FALSE7]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-32-EX: .omp.lastprivate.then:
// CHECK-32-EX-NEXT: [[TMP27:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP27]], ptr [[A_ADDR]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP27]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-32-EX: .omp.lastprivate.done:
// CHECK-32-EX-NEXT: call void @__kmpc_free_shared(ptr [[A1]], i32 4)
@@ -19789,69 +22355,82 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l37_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP8]], ptr [[A1]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP8]], ptr [[A1_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], [[TMP10]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-32-EX: .omp.lastprivate.then:
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[A1]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[A_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP13]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-32-EX: .omp.lastprivate.done:
// CHECK-32-EX-NEXT: ret void
@@ -19860,18 +22439,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -19881,81 +22463,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -19967,63 +22559,74 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l40_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -20035,18 +22638,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -20056,81 +22662,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -20142,53 +22758,64 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l43_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -20200,18 +22827,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -20221,81 +22851,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -20307,61 +22947,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l46_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP271]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP272:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP117]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP118:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -20374,18 +23025,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -20395,81 +23049,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -20481,61 +23145,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l49_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP274]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP275:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP120]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP121:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -20548,18 +23223,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -20569,81 +23247,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -20655,61 +23343,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l52_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP277]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP278:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP123]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP124:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -20722,18 +23421,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -20743,81 +23445,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -20829,61 +23541,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l55_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP280]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP281:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP126]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP127:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -20896,18 +23619,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -20917,92 +23643,103 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[B:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP129]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP283]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP284:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP129]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP130:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21011,63 +23748,74 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l58_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP286]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP286]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP287:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP132]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP133:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21076,18 +23824,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -21097,93 +23848,104 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[B:%.*]] = alloca [3 x i32], align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 @"__const.<captured>.b", i32 12, i1 false)
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B:%.*]] = alloca [3 x i32], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B_ASCAST]], ptr align 4 @"__const.<captured>.b", i32 12, i1 false)
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP135]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP289]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP290:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP135]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP136:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21192,73 +23954,84 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l66_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP292]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP293:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP138]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP139:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP12]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21267,18 +24040,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -21288,72 +24064,82 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[C:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK-32-EX-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP295]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP295]]
+// CHECK-32-EX-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP141]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP295]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP295]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP296:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP141]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP142:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[C]], align 4
@@ -21365,63 +24151,74 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP298]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP298]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP299:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP144]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP145:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP3]])
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21430,38 +24227,45 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined_wrapper
// CHECK-32-EX-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
+// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK-32-EX-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1
// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], i32 [[TMP6]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l73_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], i32 [[TMP6]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: ret void
//
//
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -21471,91 +24275,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP147]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP301]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP302:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP147]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP148:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21564,72 +24378,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l81_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP304]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP305:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP150]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP151:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21638,18 +24463,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -21659,91 +24487,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP153]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP307]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP308:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP153]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP154:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21752,72 +24590,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l85_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP310]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP311:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP156]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP157:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21826,18 +24675,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -21847,91 +24699,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP159]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP313]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP314:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP159]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP160:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -21940,72 +24802,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l89_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP316]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP317:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP162]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP163:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -22014,18 +24887,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -22035,91 +24911,101 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2), !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2), !llvm.access.group [[ACC_GRP165]]
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP319]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP320:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP165]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP166:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP22]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -22128,72 +25014,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l93_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP322]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP323:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP168]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP169:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP5]])
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP13]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -22202,18 +25099,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -22223,81 +25123,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -22309,53 +25219,64 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l97_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -22367,18 +25288,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -22388,81 +25312,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -22474,63 +25408,74 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l101_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -22542,18 +25487,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -22563,81 +25511,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -22649,53 +25607,64 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l105_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -22707,18 +25676,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -22728,81 +25700,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -22814,61 +25796,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l109_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP325]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP326:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP171]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP172:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -22881,18 +25874,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -22902,81 +25898,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -22988,61 +25994,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l113_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP328]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP329:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP174]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP175:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -23055,18 +26072,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -23076,81 +26096,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -23162,61 +26192,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l117_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP331]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP332:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP177]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP178:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -23229,18 +26270,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -23250,81 +26294,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -23336,61 +26390,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l121_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP334]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP335:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP180]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP181:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -23403,18 +26468,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -23424,81 +26492,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -23510,53 +26588,64 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l125_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -23568,18 +26657,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -23589,81 +26681,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -23675,63 +26777,74 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l130_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -23743,18 +26856,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -23764,81 +26880,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -23850,53 +26976,64 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l135_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -23908,18 +27045,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -23929,81 +27069,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -24015,61 +27165,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l140_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741862, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP337]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP338:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP183]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP184:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -24082,18 +27243,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -24103,81 +27267,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -24189,61 +27363,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l145_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741861, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP340]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP341:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP186]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP187:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -24256,18 +27441,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -24277,81 +27465,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -24363,61 +27561,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l150_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741859, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP343]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP344:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP189]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP190:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -24430,18 +27639,21 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -24451,81 +27663,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP5]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP9]], ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP11]], ptr [[TMP10]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK-32-EX: cond.true5:
// CHECK-32-EX-NEXT: br label [[COND_END7:%.*]]
// CHECK-32-EX: cond.false6:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END7]]
// CHECK-32-EX: cond.end7:
// CHECK-32-EX-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -24537,61 +27759,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l155_omp_outlined_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP0]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP5]], i32 1073741860, i32 [[TMP2]], i32 [[TMP3]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP5]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP11]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP346]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP347:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP192]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP193:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -24604,20 +27837,23 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[LOADEDV]] to i32
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -24627,74 +27863,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l160_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -24704,15 +27949,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -24722,55 +27969,64 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l163_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -24782,15 +28038,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -24800,74 +28058,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l166_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -24877,15 +28144,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -24895,51 +28164,60 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l169_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP349]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP350:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP195]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP196:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -24952,15 +28230,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -24970,51 +28250,60 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l172_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP352]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP353:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP198]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP199:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -25027,15 +28316,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25045,51 +28336,60 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l175_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP355]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP356:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP201]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP202:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -25102,15 +28402,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25120,51 +28422,60 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l178_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP358]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP359:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP204]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP205:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -25177,20 +28488,23 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP2]] to i1
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = zext i1 [[LOADEDV]] to i32
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 [[TMP3]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25200,82 +28514,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l181_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP361]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP362:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP207]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP208:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[TMP1]])
@@ -25285,15 +28608,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25303,65 +28628,74 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l185_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP364]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP365:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP210]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP211:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -25371,15 +28705,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25389,82 +28725,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l189_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP367]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP368:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP213]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP214:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -25474,15 +28819,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25492,62 +28839,71 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l193_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP370]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP371:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP216]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP217:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -25557,15 +28913,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25575,62 +28933,71 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l197_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP373]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP374:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP219]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP220:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -25640,15 +29007,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25658,62 +29027,71 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l201_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP376]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP377:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP222]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP223:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -25723,15 +29101,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25741,62 +29121,71 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l205_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP379]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP380:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP225]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP226:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -25806,15 +29195,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25824,63 +29215,72 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l209_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 65, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP382]]
-// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP382]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP383:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP228]]
+// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_fini_4(ptr @[[GLOB1]], i32 [[TMP1]]), !llvm.access.group [[ACC_GRP228]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP229:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -25890,15 +29290,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25908,65 +29310,74 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l214_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP385]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP386:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP231]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP232:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK-32-EX: omp.loop.exit:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP9]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP10]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -25976,15 +29387,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -25994,82 +29407,91 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l219_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP388]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP389:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP234]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP235:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP16]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -26079,15 +29501,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26097,62 +29521,71 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l224_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP391]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP392:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP237]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP238:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -26162,15 +29595,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26180,62 +29615,71 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l229_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP394]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP395:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP240]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP241:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -26245,15 +29689,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26263,62 +29709,71 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l234_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP397]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP398:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP243]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP244:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -26328,15 +29783,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR10]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26346,62 +29803,71 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l239_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP400]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP401:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP246]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP247:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_deinit(ptr @[[GLOB1]], i32 [[TMP1]])
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP9]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[TMP1]])
@@ -26411,15 +29877,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26429,74 +29897,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l244_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -26506,15 +29983,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26524,55 +30003,64 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l248_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -26584,15 +30072,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26602,74 +30092,83 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l252_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP1]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP1]])
@@ -26679,15 +30178,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26697,51 +30198,60 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l256_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741862, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP403]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP404:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP249]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP250:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -26754,15 +30264,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26772,51 +30284,60 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l260_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741861, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP406]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP407:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP252]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP253:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -26829,15 +30350,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26847,51 +30370,60 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l264_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741859, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP409]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP410:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP255]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP256:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
@@ -26904,15 +30436,17 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR8]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -26922,51 +30456,60 @@ int a;
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l268_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_dispatch_init_4(ptr @[[GLOB1]], i32 [[TMP1]], i32 1073741860, i32 0, i32 9, i32 1, i32 1)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]])
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_dispatch_next_4(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]])
// CHECK-32-EX-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP4]], [[TMP5]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP412]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP413:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP258]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP259:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp
index 4f38e2c50efe3..719c896678dc0 100644
--- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp
@@ -86,8 +86,9 @@ void bar() {
// CHECK1-LABEL: define {{[^@]+}}@main
// CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: store i32 0, ptr [[RETVAL_ASCAST]], align 4
// CHECK1-NEXT: store i32 2, ptr @_ZZ4mainE1a, align 4
// CHECK1-NEXT: store double 3.000000e+00, ptr @b1, align 8
// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]]
@@ -97,6 +98,8 @@ void bar() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3fooIiET_v
// CHECK1-SAME: () #[[ATTR1:[0-9]+]] comdat {
// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZN2STIiE1mE, align 4
// CHECK1-NEXT: store i32 [[TMP0]], ptr @v, align 4
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr @v, align 4
@@ -106,40 +109,50 @@ void bar() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3barv
// CHECK1-SAME: () #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[BAR_B:%.*]] = alloca double, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+// CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[BAR_B:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK1-NEXT: [[BAR_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_A]] to ptr
+// CHECK1-NEXT: [[BAR_B_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_B]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3barv_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[BAR_A]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BAR_A:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[BAR_A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_A]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[BAR_A_ASCAST]], align 4
// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double
// CHECK1-NEXT: store double [[CONV]], ptr addrspacecast (ptr addrspace(3) @bar_b to ptr), align 8
-// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]]
+// CHECK1-NEXT: call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A_ASCAST]]) #[[ATTR7]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3barv_omp_outlined_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: call void @_Z3barv_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: call void @_Z3barv_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR5:[0-9]+]]
// CHECK1-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp
index 5c2f6e5de0a5f..ce6d3e5e52550 100644
--- a/clang/test/OpenMP/nvptx_data_sharing.cpp
+++ b/clang/test/OpenMP/nvptx_data_sharing.cpp
@@ -33,11 +33,15 @@ void test_ds(){
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8
-// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-NEXT: [[C_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C]] to ptr
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_kernel_environment, ptr [[DYN_PTR]])
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -46,16 +50,16 @@ void test_ds(){
// CHECK-NEXT: [[B:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK-NEXT: store i32 10, ptr [[A]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-NEXT: store ptr [[A]], ptr [[TMP2]], align 8
-// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK-NEXT: store i32 100, ptr [[B]], align 4
-// CHECK-NEXT: store i32 1000, ptr [[C]], align 4
-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 0
+// CHECK-NEXT: store i32 1000, ptr [[C_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1_ASCAST]], i64 0, i64 0
// CHECK-NEXT: store ptr [[B]], ptr [[TMP3]], align 8
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1]], i64 0, i64 1
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS1_ASCAST]], i64 0, i64 1
// CHECK-NEXT: store ptr [[A]], ptr [[TMP4]], align 8
-// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 2)
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1_ASCAST]], i64 2)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[B]], i64 4)
// CHECK-NEXT: call void @__kmpc_free_shared(ptr [[A]], i64 4)
// CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -67,13 +71,16 @@ void test_ds(){
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK-NEXT: store i32 1000, ptr [[TMP0]], align 4
// CHECK-NEXT: ret void
//
@@ -81,37 +88,47 @@ void test_ds(){
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined_wrapper
// CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[C1:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-NEXT: store ptr [[C]], ptr [[C1]], align 8
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[C1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[C_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C]] to ptr
+// CHECK-NEXT: [[C1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C1]] to ptr
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT: store ptr [[C_ASCAST]], ptr [[C1_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10000
// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
@@ -121,19 +138,23 @@ void test_ds(){
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1_wrapper
// CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
-// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR4]]
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_omp_outlined1(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR4]]
// CHECK-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
index 986aeadaf998c..9bcf3e694ca9b 100644
--- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp
@@ -27,36 +27,45 @@ int main(int argc, char **argv) {
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19
// CHECK4-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
-// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK4-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8
+// CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK4-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK4-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK4-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK4-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK4-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK4-NEXT: [[ARGC_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_CASTED]] to ptr
+// CHECK4-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK4-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
+// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment, ptr [[DYN_PTR]])
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
-// CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
-// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR4:[0-9]+]]
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP7:%.*]] = load i64, ptr [[ARGC_CASTED_ASCAST]], align 8
+// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP7]], ptr [[TMP3]]) #[[ATTR4:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_target_deinit()
// CHECK4-NEXT: ret void
// CHECK4: worker.exit:
@@ -66,145 +75,164 @@ int main(int argc, char **argv) {
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined
// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
-// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8
-// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK4-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 8
+// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4, addrspace(5)
+// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 8, addrspace(5)
+// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK4-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK4-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK4-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK4-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK4-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK4-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK4-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK4-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK4-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK4-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK4-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK4-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK4-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK4-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK4-NEXT: [[B4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B4]] to ptr
+// CHECK4-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr
+// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
// CHECK4-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 40)
-// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK4-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK4-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK4-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK4-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK4: omp.precond.then:
-// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i64 40, i1 false)
+// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4_ASCAST]], ptr align 4 [[TMP0]], i64 40, i1 false)
// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK4: cond.true:
-// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK4-NEXT: br label [[COND_END:%.*]]
// CHECK4: cond.false:
-// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK4-NEXT: br label [[COND_END]]
// CHECK4: cond.end:
// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK4-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
-// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK4-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP18]] to ptr
// CHECK4-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
-// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to ptr
// CHECK4-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP25]], align 8
-// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// CHECK4-NEXT: store ptr [[ARGC_ADDR_ASCAST]], ptr [[TMP25]], align 8
+// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK4-NEXT: store ptr [[TMP2]], ptr [[TMP26]], align 8
-// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
-// CHECK4-NEXT: store ptr [[B4]], ptr [[TMP27]], align 8
-// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
+// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
+// CHECK4-NEXT: store ptr [[B4_ASCAST]], ptr [[TMP27]], align 8
+// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 5
// CHECK4-NEXT: store ptr [[C1]], ptr [[TMP28]], align 8
-// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 6
+// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 6
// CHECK4-NEXT: store ptr [[TMP3]], ptr [[TMP29]], align 8
// CHECK4-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP30]], 0
// CHECK4-NEXT: [[TMP31:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP32:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK4-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4
-// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 [[TMP31]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 7)
+// CHECK4-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP33]], i32 [[TMP31]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 7)
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK4-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
-// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK4-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
-// CHECK4-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK4-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP38]], [[TMP39]]
-// CHECK4-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]]
// CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
// CHECK4: cond.true12:
-// CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK4-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK4-NEXT: br label [[COND_END14:%.*]]
// CHECK4: cond.false13:
-// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK4-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK4-NEXT: br label [[COND_END14]]
// CHECK4: cond.end14:
// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE12]] ], [ [[TMP43]], [[COND_FALSE13]] ]
-// CHECK4-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK4-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP44]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
-// CHECK4-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP45:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK4-NEXT: [[TMP46:%.*]] = load i32, ptr [[TMP45]], align 4
// CHECK4-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP46]])
-// CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK4-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK4-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
// CHECK4-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK4: .omp.lastprivate.then:
@@ -220,96 +248,117 @@ int main(int argc, char **argv) {
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined
// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] {
// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
-// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
-// CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4
-// CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
-// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 8
+// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4, addrspace(5)
+// CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4, addrspace(5)
+// CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK4-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK4-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK4-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK4-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK4-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK4-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK4-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK4-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK4-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK4-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK4-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK4-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK4-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK4-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK4-NEXT: [[B4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B4]] to ptr
+// CHECK4-NEXT: [[C5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C5]] to ptr
+// CHECK4-NEXT: [[I6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I6]] to ptr
+// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8
+// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK4-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK4-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK4-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK4-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK4: omp.precond.then:
-// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4
-// CHECK4-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32
-// CHECK4-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK4-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32
-// CHECK4-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK4-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP2]], i64 40, i1 false)
-// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK4-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK4-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4_ASCAST]], ptr align 4 [[TMP2]], i64 40, i1 false)
+// CHECK4-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK4-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK4: omp.inner.for.cond:
-// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP14]] to i64
-// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK4-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP15]]
// CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK4: omp.inner.for.body:
-// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK4-NEXT: store i32 [[ADD]], ptr [[I6]], align 4
-// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I6]]) #[[ATTR8:[0-9]+]]
+// CHECK4-NEXT: store i32 [[ADD]], ptr [[I6_ASCAST]], align 4
+// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I6_ASCAST]]) #[[ATTR8:[0-9]+]]
// CHECK4-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]]
// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]]
-// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I6]], align 4
+// CHECK4-NEXT: [[TMP17:%.*]] = load i32, ptr [[I6_ASCAST]], align 4
// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
-// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B4]], i64 0, i64 [[IDXPROM]]
+// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B4_ASCAST]], i64 0, i64 [[IDXPROM]]
// CHECK4-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]]
// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]]
-// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[I6]], align 4
+// CHECK4-NEXT: [[TMP18:%.*]] = load i32, ptr [[I6_ASCAST]], align 4
// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP18]] to i64
-// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[C5]], i64 0, i64 [[IDXPROM13]]
+// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[C5_ASCAST]], i64 0, i64 [[IDXPROM13]]
// CHECK4-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]]
// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]]
-// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6]], align 4
+// CHECK4-NEXT: [[TMP19:%.*]] = load i32, ptr [[I6_ASCAST]], align 4
// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP19]] to i64
// CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i64 0, i64 [[IDXPROM17]]
// CHECK4-NEXT: [[CALL19:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX18]]) #[[ATTR8]]
@@ -319,22 +368,22 @@ int main(int argc, char **argv) {
// CHECK4: omp.body.continue:
// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK4: omp.inner.for.inc:
-// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK4-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV]], align 4
+// CHECK4-NEXT: store i32 [[ADD21]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK4: omp.inner.for.end:
// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK4: omp.loop.exit:
-// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK4-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK4-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP23]])
-// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK4-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK4-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
// CHECK4-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK4: .omp.lastprivate.then:
-// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[C5]], i64 40, i1 false)
+// CHECK4-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP3]], ptr align 4 [[C5_ASCAST]], i64 40, i1 false)
// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK4: .omp.lastprivate.done:
// CHECK4-NEXT: br label [[OMP_PRECOND_END]]
@@ -345,36 +394,45 @@ int main(int argc, char **argv) {
// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19
// CHECK5-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK5-NEXT: entry:
-// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK5-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK5-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK5-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK5-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK5-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK5-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK5-NEXT: [[ARGC_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_CASTED]] to ptr
+// CHECK5-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK5-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
+// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_kernel_environment, ptr [[DYN_PTR]])
// CHECK5-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK5-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK5: user_code.entry:
// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED]], align 4
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
-// CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR4:[0-9]+]]
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP6]], ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK5-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]], i32 [[TMP7]], ptr [[TMP3]]) #[[ATTR4:[0-9]+]]
// CHECK5-NEXT: call void @__kmpc_target_deinit()
// CHECK5-NEXT: ret void
// CHECK5: worker.exit:
@@ -384,143 +442,162 @@ int main(int argc, char **argv) {
// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined
// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK5-NEXT: entry:
-// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4
-// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 4
-// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK5-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4, addrspace(5)
+// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x ptr], align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK5-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK5-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK5-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK5-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK5-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK5-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK5-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK5-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK5-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK5-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK5-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK5-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK5-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK5-NEXT: [[B4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B4]] to ptr
+// CHECK5-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr
+// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
// CHECK5-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 40)
-// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0
// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK5-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK5-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK5-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK5-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK5: omp.precond.then:
-// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B4]], ptr align 4 [[TMP0]], i32 40, i1 false)
+// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B4_ASCAST]], ptr align 4 [[TMP0]], i32 40, i1 false)
// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK5: cond.true:
-// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK5-NEXT: br label [[COND_END:%.*]]
// CHECK5: cond.false:
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK5-NEXT: br label [[COND_END]]
// CHECK5: cond.end:
// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK5-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP17]] to ptr
// CHECK5-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4
-// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP18]] to ptr
// CHECK5-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4
-// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
-// CHECK5-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP23]], align 4
-// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
+// CHECK5-NEXT: store ptr [[ARGC_ADDR_ASCAST]], ptr [[TMP23]], align 4
+// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK5-NEXT: store ptr [[TMP2]], ptr [[TMP24]], align 4
-// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
-// CHECK5-NEXT: store ptr [[B4]], ptr [[TMP25]], align 4
-// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 5
+// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 4
+// CHECK5-NEXT: store ptr [[B4_ASCAST]], ptr [[TMP25]], align 4
+// CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 5
// CHECK5-NEXT: store ptr [[C1]], ptr [[TMP26]], align 4
-// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 6
+// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 6
// CHECK5-NEXT: store ptr [[TMP3]], ptr [[TMP27]], align 4
// CHECK5-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK5-NEXT: [[TMP29:%.*]] = zext i1 [[TOBOOL]] to i32
-// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK5-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
-// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 [[TMP29]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 7)
+// CHECK5-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 [[TMP29]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 7)
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK5-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP32]], [[TMP33]]
-// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
-// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK5-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
-// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]]
// CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]]
// CHECK5: cond.true12:
-// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK5-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK5-NEXT: br label [[COND_END14:%.*]]
// CHECK5: cond.false13:
-// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK5-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK5-NEXT: br label [[COND_END14]]
// CHECK5: cond.end14:
// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE12]] ], [ [[TMP41]], [[COND_FALSE13]] ]
-// CHECK5-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: store i32 [[COND15]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5: omp.loop.exit:
-// CHECK5-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK5-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
// CHECK5-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
-// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK5-NEXT: [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK5-NEXT: [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
// CHECK5-NEXT: br i1 [[TMP46]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK5: .omp.lastprivate.then:
@@ -536,91 +613,112 @@ int main(int argc, char **argv) {
// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19_omp_outlined_omp_outlined
// CHECK5-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR1]] {
// CHECK5-NEXT: entry:
-// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4
-// CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4
-// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4
-// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK5-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
-// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4, addrspace(5)
+// CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4, addrspace(5)
+// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK5-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK5-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK5-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK5-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK5-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK5-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK5-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK5-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK5-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK5-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK5-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK5-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK5-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK5-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK5-NEXT: [[B3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B3]] to ptr
+// CHECK5-NEXT: [[C4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C4]] to ptr
+// CHECK5-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr
+// CHECK5-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0
// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK5-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK5-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK5-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK5-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]]
// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK5: omp.precond.then:
-// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4
-// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B3]], ptr align 4 [[TMP2]], i32 40, i1 false)
-// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK5-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B3_ASCAST]], ptr align 4 [[TMP2]], i32 40, i1 false)
+// CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK5-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK5: omp.inner.for.cond:
-// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP14]], [[TMP15]]
// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK5: omp.inner.for.body:
-// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK5-NEXT: store i32 [[ADD]], ptr [[I5]], align 4
-// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR8:[0-9]+]]
+// CHECK5-NEXT: store i32 [[ADD]], ptr [[I5_ASCAST]], align 4
+// CHECK5-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5_ASCAST]]) #[[ATTR8:[0-9]+]]
// CHECK5-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP1]]) #[[ATTR8]]
// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
-// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I5]], align 4
-// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3]], i32 0, i32 [[TMP17]]
+// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[I5_ASCAST]], align 4
+// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B3_ASCAST]], i32 0, i32 [[TMP17]]
// CHECK5-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX]]) #[[ATTR8]]
// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
-// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5]], align 4
-// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4]], i32 0, i32 [[TMP18]]
+// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[I5_ASCAST]], align 4
+// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[C4_ASCAST]], i32 0, i32 [[TMP18]]
// CHECK5-NEXT: [[CALL12:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX11]]) #[[ATTR8]]
// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]]
-// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5]], align 4
+// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[I5_ASCAST]], align 4
// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP4]], i32 0, i32 [[TMP19]]
// CHECK5-NEXT: [[CALL15:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARRAYIDX14]]) #[[ATTR8]]
// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]]
@@ -629,22 +727,22 @@ int main(int argc, char **argv) {
// CHECK5: omp.body.continue:
// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK5: omp.inner.for.inc:
-// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK5-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4
+// CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK5: omp.inner.for.end:
// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK5: omp.loop.exit:
-// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK5-NEXT: [[TMP22:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4
// CHECK5-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP23]])
-// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK5-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
// CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK5: .omp.lastprivate.then:
-// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[C4]], i32 40, i1 false)
+// CHECK5-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP3]], ptr align 4 [[C4_ASCAST]], i32 40, i1 false)
// CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK5: .omp.lastprivate.done:
// CHECK5-NEXT: br label [[OMP_PRECOND_END]]
diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
index 8868250631ec9..edf9cf3996707 100644
--- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp
@@ -28,15 +28,17 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -46,10 +48,12 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]]
// CHECK1-NEXT: ret void
//
@@ -57,17 +61,19 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3usev
// CHECK1-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3usev_omp_outlined, ptr @_Z3usev_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3usev_omp_outlined, ptr @_Z3usev_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -82,10 +88,12 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3usev_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: call void @_Z4workv() #[[ATTR8]]
// CHECK1-NEXT: ret void
//
@@ -93,30 +101,36 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3usev_omp_outlined_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: call void @_Z3usev_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: call void @_Z3usev_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -126,10 +140,12 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l21_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: call void @_Z3usev() #[[ATTR8:[0-9]+]]
// CHECK2-NEXT: ret void
//
@@ -137,17 +153,19 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@_Z3usev
// CHECK2-SAME: () #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3usev_omp_outlined, ptr @_Z3usev_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3usev_omp_outlined, ptr @_Z3usev_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR5:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -162,10 +180,12 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@_Z3usev_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: call void @_Z4workv() #[[ATTR8]]
// CHECK2-NEXT: ret void
//
@@ -173,14 +193,18 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@_Z3usev_omp_outlined_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: call void @_Z3usev_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: call void @_Z3usev_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
index 9c985c1fde608..c293569b664b6 100644
--- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp
@@ -36,21 +36,24 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -60,26 +63,31 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3usePi
// CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @_Z3usePi_omp_outlined, ptr @_Z3usePi_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[C_ADDR_ASCAST]], ptr [[TMP1]], align 8
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @_Z3usePi_omp_outlined, ptr @_Z3usePi_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]]
// CHECK1-NEXT: ret void
//
@@ -87,31 +95,38 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z3usePi_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[C:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META7:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8
// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]]
// CHECK1-NEXT: ret void
@@ -120,9 +135,10 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@_Z4workPi
// CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4
// CHECK1-NEXT: ret void
//
@@ -130,39 +146,46 @@ int main() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3usePi_omp_outlined_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @_Z3usePi_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @_Z3usePi_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]]
-// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -172,26 +195,31 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@_Z3usePi
// CHECK2-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[C_ADDR]], ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @_Z3usePi_omp_outlined, ptr @_Z3usePi_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[C_ADDR_ASCAST]], ptr [[TMP1]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 2, i32 -1, ptr @_Z3usePi_omp_outlined, ptr @_Z3usePi_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]]
// CHECK2-NEXT: ret void
//
@@ -199,31 +227,38 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z3usePi_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[C:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4
// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]]
// CHECK2-NEXT: ret void
@@ -232,9 +267,10 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@_Z4workPi
// CHECK2-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4
// CHECK2-NEXT: ret void
//
@@ -242,17 +278,21 @@ int main() {
// CHECK2-LABEL: define {{[^@]+}}@_Z3usePi_omp_outlined_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @_Z3usePi_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR4]]
+// CHECK2-NEXT: call void @_Z3usePi_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
index 0fea287bce94e..5a454a83d084b 100644
--- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -76,24 +76,29 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS2]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0)
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i64 0)
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1_ASCAST]], i64 0)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2_ASCAST]], i64 0)
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -103,116 +108,143 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i32 42, ptr [[A]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 42, ptr [[A_ASCAST]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i32 43, ptr [[A]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 43, ptr [[A_ASCAST]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i32 44, ptr [[A]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 44, ptr [[A_ASCAST]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000
// CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
@@ -226,13 +258,16 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i32 45, ptr [[A]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 45, ptr [[A_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// CHECK1-NEXT: ret void
@@ -241,37 +276,44 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR3]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[A1:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[A1]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[A1]], align 4
@@ -285,29 +327,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[CRITICAL_COUNTER_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CRITICAL_COUNTER]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: store i32 0, ptr [[CRITICAL_COUNTER]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[CRITICAL_COUNTER_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]]
// CHECK1: omp.critical.loop:
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER_ASCAST]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]]
// CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]]
// CHECK1: omp.critical.test:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER_ASCAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]]
// CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]]
// CHECK1: omp.critical.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var")
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -318,7 +364,7 @@ int bar(int n){
// CHECK1: omp.critical.sync:
// CHECK1-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
// CHECK1-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]]
// CHECK1: omp.critical.exit:
// CHECK1-NEXT: ret void
@@ -327,42 +373,51 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS1]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS2]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i32 0)
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2]], i32 0)
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper, ptr [[CAPTURED_VARS_ADDRS1_ASCAST]], i32 0)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper, ptr [[CAPTURED_VARS_ADDRS2_ASCAST]], i32 0)
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -372,116 +427,143 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 42, ptr [[A]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 42, ptr [[A_ASCAST]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 43, ptr [[A]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 43, ptr [[A_ASCAST]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined1(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 44, ptr [[A]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 44, ptr [[A_ASCAST]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined2(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000
// CHECK2-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32
// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
@@ -495,13 +577,16 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 45, ptr [[A]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 45, ptr [[A_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]])
// CHECK2-NEXT: ret void
@@ -510,37 +595,44 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR2]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR2]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[A1:%.*]] = call align 4 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[A1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[A1]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[A1]], align 4
@@ -554,29 +646,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[CRITICAL_COUNTER_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CRITICAL_COUNTER]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i64 @__kmpc_warp_active_thread_mask()
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: store i32 0, ptr [[CRITICAL_COUNTER]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[CRITICAL_COUNTER_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]]
// CHECK2: omp.critical.loop:
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[CRITICAL_COUNTER_ASCAST]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], [[NVPTX_NUM_THREADS]]
// CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]]
// CHECK2: omp.critical.test:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[CRITICAL_COUNTER_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP2]], [[TMP5]]
// CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]]
// CHECK2: omp.critical.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
// CHECK2-NEXT: call void @__kmpc_critical(ptr @[[GLOB1]], i32 [[TMP8]], ptr @"_gomp_critical_user_$var")
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
@@ -587,7 +683,7 @@ int bar(int n){
// CHECK2: omp.critical.sync:
// CHECK2-NEXT: call void @__kmpc_syncwarp(i64 [[TMP1]])
// CHECK2-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK2-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[CRITICAL_COUNTER_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]]
// CHECK2: omp.critical.exit:
// CHECK2-NEXT: ret void
@@ -596,17 +692,21 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR2]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
index bc9e53bdeb478..66a72744b8f1b 100644
--- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp
@@ -35,27 +35,31 @@ int bar(int n){
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_kernel_environment, ptr [[DYN_PTR]])
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[D:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
// CHECK-NEXT: store i32 [[TMP3]], ptr [[D]], align 4
-// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-NEXT: store ptr [[D]], ptr [[TMP5]], align 8
-// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 3
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
@@ -70,63 +74,74 @@ int bar(int n){
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8
-// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP3]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK: omp.dispatch.cond:
-// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9
// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK: cond.true:
// CHECK-NEXT: br label [[COND_END:%.*]]
// CHECK: cond.false:
-// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-NEXT: br label [[COND_END]]
// CHECK: cond.end:
// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ]
-// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]]
// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK: omp.dispatch.body:
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK: omp.inner.for.cond:
-// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]]
// CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK: omp.inner.for.body:
-// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1
// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -136,21 +151,21 @@ int bar(int n){
// CHECK: omp.body.continue:
// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK: omp.inner.for.inc:
-// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK: omp.inner.for.end:
// CHECK-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK: omp.dispatch.inc:
-// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
-// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
+// CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK: omp.dispatch.end:
// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB2]], i32 [[TMP3]])
@@ -160,19 +175,23 @@ int bar(int n){
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_omp_outlined_wrapper
// CHECK-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
-// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 0045bd4854443..4f7acc105f2d2 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -147,24 +147,28 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[PTR1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR1_ADDR]] to ptr
+// CHECK1-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 8, !nonnull [[META12:![0-9]+]], !align [[META13:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[PTR1_ADDR_ASCAST]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -174,16 +178,20 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined
// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR1:%.*]], ptr nonnull align 8 dereferenceable(8) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[PTR1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR1_ADDR]] to ptr
+// CHECK1-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META13]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META13]]
// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 8
@@ -194,8 +202,9 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -209,24 +218,26 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[AA:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
-// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -236,41 +247,51 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i64 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i64 [[VLA1:%.*]], i64 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
-// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8
-// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// CHECK1-NEXT: [[BN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BN_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// CHECK1-NEXT: [[VLA_ADDR4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR4]] to ptr
+// CHECK1-NEXT: [[CN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CN_ADDR]] to ptr
+// CHECK1-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[BN]], ptr [[BN_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META14:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META14]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META13]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META13]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META13]]
// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2
// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double
@@ -317,11 +338,15 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi
// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[I]], ptr [[I_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK1-NEXT: ret ptr [[X]]
//
@@ -329,34 +354,39 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], i64 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[AAA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AAA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META14]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
+// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR_ASCAST]], align 1
// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
-// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1
+// CHECK1-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR_ASCAST]], align 1
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
@@ -370,27 +400,33 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[THIS:%.*]], i64 [[B:%.*]], i64 [[VLA:%.*]], i64 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// CHECK1-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META15:![0-9]+]]
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
@@ -418,18 +454,23 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd
// CHECK1-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[CLEANUP_DEST_SLOT:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
// CHECK1-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: store i32 [[F1]], ptr [[F]], align 4
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META13]]
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[F]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3baziRd_omp_outlined, ptr @_Z3baziRd_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3baziRd_omp_outlined, ptr @_Z3baziRd_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[F]], i64 4)
// CHECK1-NEXT: ret i32 [[TMP4]]
@@ -438,8 +479,9 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -456,27 +498,31 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META14]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1
@@ -490,19 +536,24 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined
// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META14]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META12]], !align [[META13]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !nonnull [[META12]], !align [[META13]]
// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8
// CHECK1-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]]
// CHECK1-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32
@@ -513,44 +564,52 @@ void unreachable_call() {
// CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined_wrapper
// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8
-// CHECK1-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25
// CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[PTR1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR1_ADDR]] to ptr
+// CHECK2-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 4, !nonnull [[META12:![0-9]+]], !align [[META13:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[PTR1_ADDR]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[PTR1_ADDR_ASCAST]], ptr [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -560,16 +619,20 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9targetBarPiS__l25_omp_outlined
// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR1:%.*]], ptr nonnull align 4 dereferenceable(4) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[PTR1_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[PTR2_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[PTR1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR1_ADDR]] to ptr
+// CHECK2-NEXT: [[PTR2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR2_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[PTR1]], ptr [[PTR1_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[PTR2]], ptr [[PTR2_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR1_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META13]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR2_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META13]]
// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP0]], align 4
@@ -580,8 +643,9 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39
// CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -595,24 +659,26 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47
// CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[AA:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l47_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK2-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK2-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 2
// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16
-// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -622,41 +688,51 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53
// CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]], i32 [[VLA:%.*]], ptr nonnull align 4 dereferenceable(4) [[BN:%.*]], ptr nonnull align 8 dereferenceable(400) [[C:%.*]], i32 [[VLA1:%.*]], i32 [[VLA3:%.*]], ptr nonnull align 8 dereferenceable(8) [[CN:%.*]], ptr nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[BN]], ptr [[BN_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
-// CHECK2-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4
-// CHECK2-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[BN_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CN_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// CHECK2-NEXT: [[BN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BN_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// CHECK2-NEXT: [[VLA_ADDR4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR4]] to ptr
+// CHECK2-NEXT: [[CN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CN_ADDR]] to ptr
+// CHECK2-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[BN]], ptr [[BN_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[CN]], ptr [[CN_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META13]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META13]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META14:![0-9]+]]
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META14]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META14]]
// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l53_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2
// CHECK2-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = fpext float [[TMP10]] to double
@@ -703,11 +779,15 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@_ZN2TTIxcEixEi
// CHECK2-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 [[I:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[I]], ptr [[I_ADDR]], align 4
-// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
+// CHECK2-NEXT: [[RETVAL:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK2-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK2-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[I]], ptr [[I_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[THIS1]], i32 0, i32 0
// CHECK2-NEXT: ret ptr [[X]]
//
@@ -715,34 +795,39 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90
// CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], i32 [[AAA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[AAA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AAA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META13]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l90_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR]], align 1
+// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[AAA_ADDR_ASCAST]], align 1
// CHECK2-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1
// CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8
-// CHECK2-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR]], align 1
+// CHECK2-NEXT: store i8 [[CONV5]], ptr [[AAA_ADDR_ASCAST]], align 1
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1
@@ -756,27 +841,33 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108
// CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr [[THIS:%.*]], i32 [[B:%.*]], i32 [[VLA:%.*]], i32 [[VLA1:%.*]], ptr nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[VLA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR]] to ptr
+// CHECK2-NEXT: [[VLA_ADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VLA_ADDR2]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[VLA]], ptr [[VLA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[VLA1]], ptr [[VLA_ADDR2_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META15:![0-9]+]]
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l108_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double
// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00
// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0
@@ -804,18 +895,23 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd
// CHECK2-SAME: (i32 [[F1:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
+// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[CLEANUP_DEST_SLOT:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
// CHECK2-NEXT: [[F:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store i32 [[F1]], ptr [[F]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META14]]
+// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[F]], ptr [[TMP2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3baziRd_omp_outlined, ptr @_Z3baziRd_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3baziRd_omp_outlined, ptr @_Z3baziRd_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F]], align 4
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[F]], i32 4)
// CHECK2-NEXT: ret i32 [[TMP4]]
@@ -824,8 +920,9 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142
// CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
@@ -842,27 +939,31 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74
// CHECK2-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META13]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP4]], 1
@@ -876,19 +977,24 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined
// CHECK2-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[F:%.*]], ptr nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META13]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META12]], !align [[META14]]
+// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 4, !nonnull [[META12]], !align [[META14]]
// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8
// CHECK2-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP3]]
// CHECK2-NEXT: [[CONV:%.*]] = fptosi double [[ADD]] to i32
@@ -899,19 +1005,23 @@ void unreachable_call() {
// CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd_omp_outlined_wrapper
// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 1
// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: call void @_Z3baziRd_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]], ptr [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
index 35f8089662bd5..9f6189b2babf6 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp
@@ -54,20 +54,23 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -77,13 +80,16 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
@@ -95,30 +101,35 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8]]
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -128,19 +139,24 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8]]
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
@@ -159,20 +175,23 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -182,13 +201,16 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META7]]
// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
@@ -200,30 +222,35 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META7]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META8]]
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -233,19 +260,24 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l35_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META8]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META7]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META8]]
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
index f92ce4e89464b..2d3b55d837309 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp
@@ -49,20 +49,23 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -72,13 +75,16 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
// CHECK1-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
@@ -90,33 +96,39 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8]]
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -126,19 +138,24 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META7]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8]]
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK1-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
@@ -157,20 +174,23 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 1024, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -180,13 +200,16 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l25_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META7]]
// CHECK2-NEXT: [[TMP1:%.*]] = load i16, ptr [[TMP0]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
@@ -198,33 +221,39 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META7]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META8]]
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP3]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK2-NEXT: store ptr [[TMP2]], ptr [[TMP8]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP4]], i32 1, i32 [[TMP5]], i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -234,19 +263,24 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l30_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META8]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META7]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META8]]
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
// CHECK2-NEXT: store i32 [[ADD]], ptr [[TMP0]], align 4
diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
index bfcdf9b5b1839..7194add3c3b7c 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp
@@ -55,15 +55,17 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -73,34 +75,40 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: ret void
//
//
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-64-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK45-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8
-// CHECK45-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK45-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK45-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
// CHECK45-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8
-// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -110,55 +118,65 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-64-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK45-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
+// CHECK45-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-64-NEXT: ret void
//
//
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
// CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-64-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK45-64-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK45-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK45-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
-// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8
-// CHECK45-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK45-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK45-64-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP4]] to ptr
// CHECK45-64-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
-// CHECK45-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK45-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK45-64-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr
// CHECK45-64-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8
-// CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK45-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK45-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
-// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK45-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -168,25 +186,30 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
// CHECK45-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
// CHECK45-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-64-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK45-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK45-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
@@ -197,15 +220,17 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -215,34 +240,40 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: ret void
//
//
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK45-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK45-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK45-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK45-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
-// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -252,55 +283,65 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-32-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK45-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
+// CHECK45-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-32-NEXT: ret void
//
//
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
// CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK45-32-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK45-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK45-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
-// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK45-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK45-32-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
// CHECK45-32-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
-// CHECK45-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK45-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK45-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK45-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK45-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK45-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
-// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK45-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -310,25 +351,30 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
// CHECK45-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
+// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
// CHECK45-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-32-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK45-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK45-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
@@ -339,15 +385,17 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -357,34 +405,40 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: ret void
//
//
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK45-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK45-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK45-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
-// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -394,55 +448,65 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK45-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
+// CHECK45-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-32-EX-NEXT: ret void
//
//
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK45-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK45-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
-// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK45-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
// CHECK45-32-EX-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK45-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK45-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
-// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK45-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -452,25 +516,30 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
// CHECK45-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK45-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK45-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
@@ -481,15 +550,17 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -499,34 +570,40 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: ret void
//
//
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-64-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8
-// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to ptr
// CHECK-64-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -536,55 +613,65 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-64-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
+// CHECK-64-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-64-NEXT: ret void
//
//
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-64-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-64-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8
-// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK-64-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP4]] to ptr
// CHECK-64-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 8
-// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP6]] to ptr
// CHECK-64-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 8
-// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -594,25 +681,30 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]], i64 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-64-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK-64-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2
// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
@@ -623,15 +715,17 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -641,34 +735,40 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: ret void
//
//
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK-32-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -678,55 +778,65 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-32-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
+// CHECK-32-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-32-NEXT: ret void
//
//
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-32-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK-32-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
// CHECK-32-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -736,25 +846,30 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-32-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK-32-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
@@ -765,15 +880,17 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 0)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -783,34 +900,40 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l27_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: ret void
//
//
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = inttoptr i32 [[TMP3]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP5]], ptr [[TMP4]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -820,55 +943,65 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l31_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP0]] to i32
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-32-EX-NEXT: [[CONV1:%.*]] = trunc i32 [[ADD]] to i16
-// CHECK-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR]], align 2
+// CHECK-32-EX-NEXT: store i16 [[CONV1]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-32-EX-NEXT: ret void
//
//
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = inttoptr i32 [[TMP4]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP8]], ptr [[TMP7]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP6]] to ptr
// CHECK-32-EX-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP11]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -878,25 +1011,30 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l36_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]], i32 noundef [[AA:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP2]] to i32
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-32-EX-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i16
-// CHECK-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR]], align 2
+// CHECK-32-EX-NEXT: store i16 [[CONV2]], ptr [[AA_ADDR_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP3]], 1
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
index 0753573c73bce..3225994928ff5 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp
@@ -100,20 +100,23 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -123,27 +126,32 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[E1:%.*]] = alloca double, align 8
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
-// CHECK-64-NEXT: store double 0.000000e+00, ptr [[E1]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[E1:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK-64-NEXT: [[E1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E1]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
+// CHECK-64-NEXT: store double 0.000000e+00, ptr [[E1_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load double, ptr [[E1_ASCAST]], align 8
// CHECK-64-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00
-// CHECK-64-NEXT: store double [[ADD]], ptr [[E1]], align 8
-// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK-64-NEXT: store ptr [[E1]], ptr [[TMP2]], align 8
-// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
+// CHECK-64-NEXT: store double [[ADD]], ptr [[E1_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK-64-NEXT: store ptr [[E1_ASCAST]], ptr [[TMP2]], align 8
+// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
// CHECK-64-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
// CHECK-64-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-64: .omp.reduction.then:
// CHECK-64-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8
-// CHECK-64-NEXT: [[TMP6:%.*]] = load double, ptr [[E1]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = load double, ptr [[E1_ASCAST]], align 8
// CHECK-64-NEXT: [[ADD2:%.*]] = fadd double [[TMP5]], [[TMP6]]
// CHECK-64-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8
// CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
@@ -154,32 +162,38 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
-// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-64-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-64-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-64-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-64-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1
// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-64-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-64-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK-64-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK-64-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK-64-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
-// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK-64-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-64-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-64-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -194,7 +208,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK-64-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-64: then:
-// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]]
+// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK-64-NEXT: br label [[IFCONT:%.*]]
// CHECK-64: else:
// CHECK-64-NEXT: br label [[IFCONT]]
@@ -204,7 +218,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK-64-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK-64: then4:
-// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
@@ -220,57 +234,60 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-64-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-64-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-64-NEXT: br label [[PRECOND:%.*]]
// CHECK-64: precond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
-// CHECK-64-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
+// CHECK-64-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK-64: body:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-64: then:
-// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK-64-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-64-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK-64-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
// CHECK-64-NEXT: br label [[IFCONT:%.*]]
// CHECK-64: else:
// CHECK-64-NEXT: br label [[IFCONT]]
// CHECK-64: ifcont:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
-// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
+// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-64: then3:
-// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK-64-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
-// CHECK-64-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
-// CHECK-64-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
+// CHECK-64-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
+// CHECK-64-NEXT: br label [[IFCONT5:%.*]]
// CHECK-64: else4:
-// CHECK-64-NEXT: br label [[IFCONT4]]
+// CHECK-64-NEXT: br label [[IFCONT5]]
// CHECK-64: ifcont5:
-// CHECK-64-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK-64-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK-64-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-64-NEXT: br label [[PRECOND]]
// CHECK-64: exit:
// CHECK-64-NEXT: ret void
@@ -279,25 +296,29 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-64-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META7]]
+// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]]
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
-// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -307,46 +328,53 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[C1:%.*]] = alloca i8, align 1
-// CHECK-64-NEXT: [[D2:%.*]] = alloca float, align 4
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8
-// CHECK-64-NEXT: store i8 0, ptr [[C1]], align 1
-// CHECK-64-NEXT: store float 1.000000e+00, ptr [[D2]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[C1:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-64-NEXT: [[D2:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-64-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK-64-NEXT: [[C1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C1]] to ptr
+// CHECK-64-NEXT: [[D2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D2]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META7]]
+// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK-64-NEXT: store i8 0, ptr [[C1_ASCAST]], align 1
+// CHECK-64-NEXT: store float 1.000000e+00, ptr [[D2_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1_ASCAST]], align 1
// CHECK-64-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// CHECK-64-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2
// CHECK-64-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8
-// CHECK-64-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1
-// CHECK-64-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4
+// CHECK-64-NEXT: store i8 [[CONV3]], ptr [[C1_ASCAST]], align 1
+// CHECK-64-NEXT: [[TMP3:%.*]] = load float, ptr [[D2_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01
-// CHECK-64-NEXT: store float [[MUL]], ptr [[D2]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK-64-NEXT: store ptr [[C1]], ptr [[TMP4]], align 8
-// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK-64-NEXT: store ptr [[D2]], ptr [[TMP5]], align 8
-// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
+// CHECK-64-NEXT: store float [[MUL]], ptr [[D2_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK-64-NEXT: store ptr [[C1_ASCAST]], ptr [[TMP4]], align 8
+// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[D2_ASCAST]], ptr [[TMP5]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
// CHECK-64-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
// CHECK-64-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-64: .omp.reduction.then:
// CHECK-64-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1
// CHECK-64-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1_ASCAST]], align 1
// CHECK-64-NEXT: [[CONV5:%.*]] = sext i8 [[TMP9]] to i32
// CHECK-64-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]]
// CHECK-64-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8
// CHECK-64-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1
// CHECK-64-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP1]], align 4
-// CHECK-64-NEXT: [[TMP11:%.*]] = load float, ptr [[D2]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load float, ptr [[D2_ASCAST]], align 4
// CHECK-64-NEXT: [[MUL8:%.*]] = fmul float [[TMP10]], [[TMP11]]
// CHECK-64-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
@@ -357,24 +385,31 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4
-// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK-64-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-64-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-64-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-64-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
// CHECK-64-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
// CHECK-64-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
@@ -382,22 +417,22 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
// CHECK-64-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
// CHECK-64-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
-// CHECK-64-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK-64-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 1
// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
-// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8
-// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i64 1
// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
// CHECK-64-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-64-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
// CHECK-64-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
-// CHECK-64-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK-64-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i64 1
-// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
-// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 8
+// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i64 1
+// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP22]], align 8
// CHECK-64-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-64-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-64-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -412,7 +447,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK-64-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-64: then:
-// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]]
+// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3]]
// CHECK-64-NEXT: br label [[IFCONT:%.*]]
// CHECK-64: else:
// CHECK-64-NEXT: br label [[IFCONT]]
@@ -422,13 +457,13 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK-64-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK-64: then5:
-// CHECK-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8
// CHECK-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8
// CHECK-64-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
// CHECK-64-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
-// CHECK-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
// CHECK-64-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8
@@ -444,73 +479,75 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-64-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-64-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-64: then:
-// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK-64-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
-// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
-// CHECK-64-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
+// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK-64-NEXT: store volatile i8 [[TMP9]], ptr addrspace(3) [[TMP8]], align 1
// CHECK-64-NEXT: br label [[IFCONT:%.*]]
// CHECK-64: else:
// CHECK-64-NEXT: br label [[IFCONT]]
// CHECK-64: ifcont:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-64: then3:
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK-64-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// CHECK-64-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
-// CHECK-64-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
-// CHECK-64-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK-64-NEXT: [[TMP14:%.*]] = load volatile i8, ptr addrspace(3) [[TMP11]], align 1
+// CHECK-64-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
+// CHECK-64-NEXT: br label [[IFCONT5:%.*]]
// CHECK-64: else4:
-// CHECK-64-NEXT: br label [[IFCONT4]]
+// CHECK-64-NEXT: br label [[IFCONT5]]
// CHECK-64: ifcont5:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK-64-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-64-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-64-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK-64: then8:
-// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK-64-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK-64-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
-// CHECK-64-NEXT: br label [[IFCONT8:%.*]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK-64-NEXT: store volatile i32 [[TMP18]], ptr addrspace(3) [[TMP17]], align 4
+// CHECK-64-NEXT: br label [[IFCONT10:%.*]]
// CHECK-64: else9:
-// CHECK-64-NEXT: br label [[IFCONT8]]
+// CHECK-64-NEXT: br label [[IFCONT10]]
// CHECK-64: ifcont10:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-64-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK-64: then13:
-// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK-64-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
-// CHECK-64-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
-// CHECK-64-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
-// CHECK-64-NEXT: br label [[IFCONT12:%.*]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
+// CHECK-64-NEXT: [[TMP23:%.*]] = load volatile i32, ptr addrspace(3) [[TMP20]], align 4
+// CHECK-64-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4
+// CHECK-64-NEXT: br label [[IFCONT15:%.*]]
// CHECK-64: else14:
-// CHECK-64-NEXT: br label [[IFCONT12]]
+// CHECK-64-NEXT: br label [[IFCONT15]]
// CHECK-64: ifcont15:
// CHECK-64-NEXT: ret void
//
@@ -518,25 +555,29 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-64-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META10:![0-9]+]]
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 8
-// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 8
-// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
+// CHECK-64-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -546,53 +587,60 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined
// CHECK-64-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK-64-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-64-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK-64-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META10]]
+// CHECK-64-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK-64-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK-64-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1
-// CHECK-64-NEXT: store i32 [[OR]], ptr [[A1]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-64-NEXT: store i32 [[OR]], ptr [[A1_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ]
// CHECK-64-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16
-// CHECK-64-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2
-// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK-64-NEXT: store ptr [[A1]], ptr [[TMP5]], align 8
-// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK-64-NEXT: store ptr [[B2]], ptr [[TMP6]], align 8
-// CHECK-64-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4)
+// CHECK-64-NEXT: store i16 [[CONV4]], ptr [[B2_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK-64-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP5]], align 8
+// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
+// CHECK-64-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP6]], align 8
+// CHECK-64-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4)
// CHECK-64-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
// CHECK-64-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-64: .omp.reduction.then:
// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK-64-NEXT: [[OR5:%.*]] = or i32 [[TMP9]], [[TMP10]]
// CHECK-64-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
// CHECK-64-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK-64-NEXT: [[CONV6:%.*]] = sext i16 [[TMP11]] to i32
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-64-NEXT: [[CONV7:%.*]] = sext i16 [[TMP12]] to i32
// CHECK-64-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK-64-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
@@ -600,7 +648,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK-64-NEXT: br label [[COND_END11:%.*]]
// CHECK-64: cond.false10:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-64-NEXT: br label [[COND_END11]]
// CHECK-64: cond.end11:
// CHECK-64-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP13]], [[COND_TRUE9]] ], [ [[TMP14]], [[COND_FALSE10]] ]
@@ -613,36 +661,43 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3
// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK-64-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-64-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-64-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-64-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-64-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-64-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK-64-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-64-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK-64-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK-64-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK-64-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
-// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
-// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
// CHECK-64-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK-64-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -650,10 +705,10 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK-64-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK-64-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK-64-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK-64-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
-// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
-// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8
+// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i64 1
+// CHECK-64-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 8
// CHECK-64-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-64-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-64-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -668,7 +723,7 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK-64-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-64: then:
-// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]]
+// CHECK-64-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3]]
// CHECK-64-NEXT: br label [[IFCONT:%.*]]
// CHECK-64: else:
// CHECK-64-NEXT: br label [[IFCONT]]
@@ -678,13 +733,13 @@ int bar(int n){
// CHECK-64-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK-64-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK-64: then5:
-// CHECK-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8
// CHECK-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-64-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8
// CHECK-64-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK-64-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
// CHECK-64-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK-64-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8
@@ -700,73 +755,75 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4
// CHECK-64-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-64-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-64-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-64-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-64-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-64-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-64-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-64-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-64-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK-64-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-64-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-64-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-64: then:
-// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK-64-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
-// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK-64-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK-64-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK-64-NEXT: br label [[IFCONT:%.*]]
// CHECK-64: else:
// CHECK-64-NEXT: br label [[IFCONT]]
// CHECK-64: ifcont:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-64: then3:
-// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK-64-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// CHECK-64-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK-64-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK-64-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-64-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK-64-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK-64-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK-64-NEXT: br label [[IFCONT5:%.*]]
// CHECK-64: else4:
-// CHECK-64-NEXT: br label [[IFCONT4]]
+// CHECK-64-NEXT: br label [[IFCONT5]]
// CHECK-64: ifcont5:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK-64-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-64-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-64-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK-64: then8:
-// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK-64-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK-64-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK-64-NEXT: br label [[IFCONT8:%.*]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK-64-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK-64-NEXT: br label [[IFCONT10:%.*]]
// CHECK-64: else9:
-// CHECK-64-NEXT: br label [[IFCONT8]]
+// CHECK-64-NEXT: br label [[IFCONT10]]
// CHECK-64: ifcont10:
-// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-64-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK-64-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-64-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-64-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK-64-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK-64: then13:
-// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK-64-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
-// CHECK-64-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK-64-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK-64-NEXT: br label [[IFCONT12:%.*]]
+// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK-64-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
+// CHECK-64-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK-64-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK-64-NEXT: br label [[IFCONT15:%.*]]
// CHECK-64: else14:
-// CHECK-64-NEXT: br label [[IFCONT12]]
+// CHECK-64-NEXT: br label [[IFCONT15]]
// CHECK-64: ifcont15:
// CHECK-64-NEXT: ret void
//
@@ -774,20 +831,23 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -797,27 +857,32 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[E1:%.*]] = alloca double, align 8
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
-// CHECK-32-NEXT: store double 0.000000e+00, ptr [[E1]], align 8
-// CHECK-32-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[E1:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK-32-NEXT: [[E1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E1]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
+// CHECK-32-NEXT: store double 0.000000e+00, ptr [[E1_ASCAST]], align 8
+// CHECK-32-NEXT: [[TMP1:%.*]] = load double, ptr [[E1_ASCAST]], align 8
// CHECK-32-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00
-// CHECK-32-NEXT: store double [[ADD]], ptr [[E1]], align 8
-// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK-32-NEXT: store ptr [[E1]], ptr [[TMP2]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
+// CHECK-32-NEXT: store double [[ADD]], ptr [[E1_ASCAST]], align 8
+// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK-32-NEXT: store ptr [[E1_ASCAST]], ptr [[TMP2]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
// CHECK-32-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
// CHECK-32-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-32: .omp.reduction.then:
// CHECK-32-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8
-// CHECK-32-NEXT: [[TMP6:%.*]] = load double, ptr [[E1]], align 8
+// CHECK-32-NEXT: [[TMP6:%.*]] = load double, ptr [[E1_ASCAST]], align 8
// CHECK-32-NEXT: [[ADD2:%.*]] = fadd double [[TMP5]], [[TMP6]]
// CHECK-32-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8
// CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
@@ -828,32 +893,38 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
-// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-32-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-32-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1
// CHECK-32-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-32-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-32-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK-32-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK-32-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK-32-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1
-// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-32-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-32-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -868,7 +939,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK-32-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32: then:
-// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]]
+// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK-32-NEXT: br label [[IFCONT:%.*]]
// CHECK-32: else:
// CHECK-32-NEXT: br label [[IFCONT]]
@@ -878,7 +949,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK-32-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK-32: then4:
-// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4
// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4
@@ -894,57 +965,60 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-32-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: br label [[PRECOND:%.*]]
// CHECK-32: precond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
-// CHECK-32-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
+// CHECK-32-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK-32: body:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32: then:
-// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
-// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-32-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK-32-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
// CHECK-32-NEXT: br label [[IFCONT:%.*]]
// CHECK-32: else:
// CHECK-32-NEXT: br label [[IFCONT]]
// CHECK-32: ifcont:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
-// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
+// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-32: then3:
-// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
-// CHECK-32-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
-// CHECK-32-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
-// CHECK-32-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
+// CHECK-32-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
+// CHECK-32-NEXT: br label [[IFCONT5:%.*]]
// CHECK-32: else4:
-// CHECK-32-NEXT: br label [[IFCONT4]]
+// CHECK-32-NEXT: br label [[IFCONT5]]
// CHECK-32: ifcont5:
-// CHECK-32-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK-32-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: br label [[PRECOND]]
// CHECK-32: exit:
// CHECK-32-NEXT: ret void
@@ -953,25 +1027,29 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-32-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META7]]
+// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9:![0-9]+]]
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -981,46 +1059,53 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[C1:%.*]] = alloca i8, align 1
-// CHECK-32-NEXT: [[D2:%.*]] = alloca float, align 4
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
-// CHECK-32-NEXT: store i8 0, ptr [[C1]], align 1
-// CHECK-32-NEXT: store float 1.000000e+00, ptr [[D2]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[C1:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-32-NEXT: [[D2:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-32-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK-32-NEXT: [[C1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C1]] to ptr
+// CHECK-32-NEXT: [[D2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D2]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META7]]
+// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK-32-NEXT: store i8 0, ptr [[C1_ASCAST]], align 1
+// CHECK-32-NEXT: store float 1.000000e+00, ptr [[D2_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1_ASCAST]], align 1
// CHECK-32-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// CHECK-32-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2
// CHECK-32-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8
-// CHECK-32-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1
-// CHECK-32-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4
+// CHECK-32-NEXT: store i8 [[CONV3]], ptr [[C1_ASCAST]], align 1
+// CHECK-32-NEXT: [[TMP3:%.*]] = load float, ptr [[D2_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01
-// CHECK-32-NEXT: store float [[MUL]], ptr [[D2]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK-32-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK-32-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
+// CHECK-32-NEXT: store float [[MUL]], ptr [[D2_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK-32-NEXT: store ptr [[C1_ASCAST]], ptr [[TMP4]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[D2_ASCAST]], ptr [[TMP5]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
// CHECK-32-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
// CHECK-32-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-32: .omp.reduction.then:
// CHECK-32-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1
// CHECK-32-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1_ASCAST]], align 1
// CHECK-32-NEXT: [[CONV5:%.*]] = sext i8 [[TMP9]] to i32
// CHECK-32-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]]
// CHECK-32-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8
// CHECK-32-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1
// CHECK-32-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP1]], align 4
-// CHECK-32-NEXT: [[TMP11:%.*]] = load float, ptr [[D2]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load float, ptr [[D2_ASCAST]], align 4
// CHECK-32-NEXT: [[MUL8:%.*]] = fmul float [[TMP10]], [[TMP11]]
// CHECK-32-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
@@ -1031,24 +1116,31 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4
-// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK-32-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-32-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
// CHECK-32-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
// CHECK-32-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
@@ -1056,22 +1148,22 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
// CHECK-32-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
// CHECK-32-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
-// CHECK-32-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK-32-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 1
// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
-// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4
-// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1
// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-32-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
// CHECK-32-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
-// CHECK-32-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK-32-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1
-// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4
+// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP22]], align 4
// CHECK-32-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-32-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-32-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -1086,7 +1178,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK-32-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32: then:
-// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]]
+// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3]]
// CHECK-32-NEXT: br label [[IFCONT:%.*]]
// CHECK-32: else:
// CHECK-32-NEXT: br label [[IFCONT]]
@@ -1096,13 +1188,13 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK-32-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK-32: then5:
-// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK-32-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
// CHECK-32-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
-// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK-32-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -1118,73 +1210,75 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-32-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32: then:
-// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
-// CHECK-32-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
+// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK-32-NEXT: store volatile i8 [[TMP9]], ptr addrspace(3) [[TMP8]], align 1
// CHECK-32-NEXT: br label [[IFCONT:%.*]]
// CHECK-32: else:
// CHECK-32-NEXT: br label [[IFCONT]]
// CHECK-32: ifcont:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-32: then3:
-// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
-// CHECK-32-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
-// CHECK-32-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load volatile i8, ptr addrspace(3) [[TMP11]], align 1
+// CHECK-32-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
+// CHECK-32-NEXT: br label [[IFCONT5:%.*]]
// CHECK-32: else4:
-// CHECK-32-NEXT: br label [[IFCONT4]]
+// CHECK-32-NEXT: br label [[IFCONT5]]
// CHECK-32: ifcont5:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK-32-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-32-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-32-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK-32: then8:
-// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK-32-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK-32-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
-// CHECK-32-NEXT: br label [[IFCONT8:%.*]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK-32-NEXT: store volatile i32 [[TMP18]], ptr addrspace(3) [[TMP17]], align 4
+// CHECK-32-NEXT: br label [[IFCONT10:%.*]]
// CHECK-32: else9:
-// CHECK-32-NEXT: br label [[IFCONT8]]
+// CHECK-32-NEXT: br label [[IFCONT10]]
// CHECK-32: ifcont10:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK-32: then13:
-// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK-32-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK-32-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
-// CHECK-32-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
-// CHECK-32-NEXT: br label [[IFCONT12:%.*]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK-32-NEXT: [[TMP23:%.*]] = load volatile i32, ptr addrspace(3) [[TMP20]], align 4
+// CHECK-32-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4
+// CHECK-32-NEXT: br label [[IFCONT15:%.*]]
// CHECK-32: else14:
-// CHECK-32-NEXT: br label [[IFCONT12]]
+// CHECK-32-NEXT: br label [[IFCONT15]]
// CHECK-32: ifcont15:
// CHECK-32-NEXT: ret void
//
@@ -1192,25 +1286,29 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META10:![0-9]+]]
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4
-// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -1220,53 +1318,60 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined
// CHECK-32-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK-32-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK-32-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META10]]
+// CHECK-32-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK-32-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK-32-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1
-// CHECK-32-NEXT: store i32 [[OR]], ptr [[A1]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-32-NEXT: store i32 [[OR]], ptr [[A1_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-32-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ]
// CHECK-32-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16
-// CHECK-32-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2
-// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK-32-NEXT: store ptr [[A1]], ptr [[TMP5]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK-32-NEXT: store ptr [[B2]], ptr [[TMP6]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4)
+// CHECK-32-NEXT: store i16 [[CONV4]], ptr [[B2_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK-32-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP5]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK-32-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP6]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4)
// CHECK-32-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
// CHECK-32-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-32: .omp.reduction.then:
// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK-32-NEXT: [[OR5:%.*]] = or i32 [[TMP9]], [[TMP10]]
// CHECK-32-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
// CHECK-32-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK-32-NEXT: [[CONV6:%.*]] = sext i16 [[TMP11]] to i32
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-32-NEXT: [[CONV7:%.*]] = sext i16 [[TMP12]] to i32
// CHECK-32-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK-32-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
@@ -1274,7 +1379,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK-32-NEXT: br label [[COND_END11:%.*]]
// CHECK-32: cond.false10:
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-32-NEXT: br label [[COND_END11]]
// CHECK-32: cond.end11:
// CHECK-32-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP13]], [[COND_TRUE9]] ], [ [[TMP14]], [[COND_FALSE10]] ]
@@ -1287,36 +1392,43 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3
// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK-32-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-32-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK-32-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-32-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK-32-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK-32-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK-32-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
-// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
-// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
// CHECK-32-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK-32-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -1324,10 +1436,10 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK-32-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK-32-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK-32-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK-32-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
-// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK-32-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 4
// CHECK-32-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-32-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-32-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -1342,7 +1454,7 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK-32-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32: then:
-// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]]
+// CHECK-32-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3]]
// CHECK-32-NEXT: br label [[IFCONT:%.*]]
// CHECK-32: else:
// CHECK-32-NEXT: br label [[IFCONT]]
@@ -1352,13 +1464,13 @@ int bar(int n){
// CHECK-32-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK-32-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK-32: then5:
-// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK-32-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK-32-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK-32-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK-32-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -1374,73 +1486,75 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4
// CHECK-32-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-32-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-32-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-32-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-32-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-32-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK-32-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-32-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-32-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32: then:
-// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK-32-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK-32-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK-32-NEXT: br label [[IFCONT:%.*]]
// CHECK-32: else:
// CHECK-32-NEXT: br label [[IFCONT]]
// CHECK-32: ifcont:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-32: then3:
-// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK-32-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK-32-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK-32-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK-32-NEXT: br label [[IFCONT5:%.*]]
// CHECK-32: else4:
-// CHECK-32-NEXT: br label [[IFCONT4]]
+// CHECK-32-NEXT: br label [[IFCONT5]]
// CHECK-32: ifcont5:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK-32-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-32-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-32-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK-32: then8:
-// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK-32-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK-32-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK-32-NEXT: br label [[IFCONT8:%.*]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK-32-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK-32-NEXT: br label [[IFCONT10:%.*]]
// CHECK-32: else9:
-// CHECK-32-NEXT: br label [[IFCONT8]]
+// CHECK-32-NEXT: br label [[IFCONT10]]
// CHECK-32: ifcont10:
-// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK-32-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK-32-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK-32: then13:
-// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK-32-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK-32-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK-32-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK-32-NEXT: br label [[IFCONT12:%.*]]
+// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK-32-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK-32-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK-32-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK-32-NEXT: br label [[IFCONT15:%.*]]
// CHECK-32: else14:
-// CHECK-32-NEXT: br label [[IFCONT12]]
+// CHECK-32-NEXT: br label [[IFCONT15]]
// CHECK-32: ifcont15:
// CHECK-32-NEXT: ret void
//
@@ -1448,20 +1562,23 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -1471,27 +1588,32 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[E1:%.*]] = alloca double, align 8
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
-// CHECK-32-EX-NEXT: store double 0.000000e+00, ptr [[E1]], align 8
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[E1:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[E1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E1]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
+// CHECK-32-EX-NEXT: store double 0.000000e+00, ptr [[E1_ASCAST]], align 8
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load double, ptr [[E1_ASCAST]], align 8
// CHECK-32-EX-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00
-// CHECK-32-EX-NEXT: store double [[ADD]], ptr [[E1]], align 8
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK-32-EX-NEXT: store ptr [[E1]], ptr [[TMP2]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
+// CHECK-32-EX-NEXT: store double [[ADD]], ptr [[E1_ASCAST]], align 8
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: store ptr [[E1_ASCAST]], ptr [[TMP2]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
// CHECK-32-EX-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-32-EX: .omp.reduction.then:
// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load double, ptr [[E1]], align 8
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load double, ptr [[E1_ASCAST]], align 8
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = fadd double [[TMP5]], [[TMP6]]
// CHECK-32-EX-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8
// CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
@@ -1502,32 +1624,38 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
-// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-32-EX-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK-32-EX-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK-32-EX-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK-32-EX-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-32-EX-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -1542,7 +1670,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK-32-EX-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32-EX: then:
-// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]]
+// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l24_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]]
// CHECK-32-EX: else:
// CHECK-32-EX-NEXT: br label [[IFCONT]]
@@ -1552,7 +1680,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK-32-EX-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK-32-EX: then4:
-// CHECK-32-EX-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4
// CHECK-32-EX-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4
@@ -1568,57 +1696,60 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[PRECOND:%.*]]
// CHECK-32-EX: precond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
-// CHECK-32-EX-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
+// CHECK-32-EX-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK-32-EX: body:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32-EX: then:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK-32-EX-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK-32-EX-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]]
// CHECK-32-EX: else:
// CHECK-32-EX-NEXT: br label [[IFCONT]]
// CHECK-32-EX: ifcont:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
-// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
+// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-32-EX: then3:
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
-// CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
+// CHECK-32-EX-NEXT: br label [[IFCONT5:%.*]]
// CHECK-32-EX: else4:
-// CHECK-32-EX-NEXT: br label [[IFCONT4]]
+// CHECK-32-EX-NEXT: br label [[IFCONT5]]
// CHECK-32-EX: ifcont5:
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK-32-EX-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[PRECOND]]
// CHECK-32-EX: exit:
// CHECK-32-EX-NEXT: ret void
@@ -1627,25 +1758,29 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META7]]
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -1655,46 +1790,53 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[C1:%.*]] = alloca i8, align 1
-// CHECK-32-EX-NEXT: [[D2:%.*]] = alloca float, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i8 0, ptr [[C1]], align 1
-// CHECK-32-EX-NEXT: store float 1.000000e+00, ptr [[D2]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[C1:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-32-EX-NEXT: [[D2:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[C1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C1]] to ptr
+// CHECK-32-EX-NEXT: [[D2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D2]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META7]]
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK-32-EX-NEXT: store i8 0, ptr [[C1_ASCAST]], align 1
+// CHECK-32-EX-NEXT: store float 1.000000e+00, ptr [[D2_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1_ASCAST]], align 1
// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32
// CHECK-32-EX-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2
// CHECK-32-EX-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8
-// CHECK-32-EX-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4
+// CHECK-32-EX-NEXT: store i8 [[CONV3]], ptr [[C1_ASCAST]], align 1
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load float, ptr [[D2_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01
-// CHECK-32-EX-NEXT: store float [[MUL]], ptr [[D2]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK-32-EX-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK-32-EX-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
+// CHECK-32-EX-NEXT: store float [[MUL]], ptr [[D2_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: store ptr [[C1_ASCAST]], ptr [[TMP4]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[D2_ASCAST]], ptr [[TMP5]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
// CHECK-32-EX-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-32-EX: .omp.reduction.then:
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1
// CHECK-32-EX-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1]], align 1
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1_ASCAST]], align 1
// CHECK-32-EX-NEXT: [[CONV5:%.*]] = sext i8 [[TMP9]] to i32
// CHECK-32-EX-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]]
// CHECK-32-EX-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8
// CHECK-32-EX-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1
// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP1]], align 4
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load float, ptr [[D2]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load float, ptr [[D2_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[MUL8:%.*]] = fmul float [[TMP10]], [[TMP11]]
// CHECK-32-EX-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
@@ -1705,24 +1847,31 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4
-// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
@@ -1730,22 +1879,22 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
// CHECK-32-EX-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
-// CHECK-32-EX-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK-32-EX-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 1
// CHECK-32-EX-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4
-// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1
// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
// CHECK-32-EX-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-32-EX-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
// CHECK-32-EX-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
-// CHECK-32-EX-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1
-// CHECK-32-EX-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4
+// CHECK-32-EX-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP22]], align 4
// CHECK-32-EX-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-32-EX-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -1760,7 +1909,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK-32-EX-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32-EX: then:
-// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]]
+// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3]]
// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]]
// CHECK-32-EX: else:
// CHECK-32-EX-NEXT: br label [[IFCONT]]
@@ -1770,13 +1919,13 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK-32-EX-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK-32-EX: then5:
-// CHECK-32-EX-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK-32-EX-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK-32-EX-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
// CHECK-32-EX-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
-// CHECK-32-EX-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK-32-EX-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -1792,73 +1941,75 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32-EX: then:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
-// CHECK-32-EX-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK-32-EX-NEXT: store volatile i8 [[TMP9]], ptr addrspace(3) [[TMP8]], align 1
// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]]
// CHECK-32-EX: else:
// CHECK-32-EX-NEXT: br label [[IFCONT]]
// CHECK-32-EX: ifcont:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-32-EX: then3:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
-// CHECK-32-EX-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
-// CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load volatile i8, ptr addrspace(3) [[TMP11]], align 1
+// CHECK-32-EX-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
+// CHECK-32-EX-NEXT: br label [[IFCONT5:%.*]]
// CHECK-32-EX: else4:
-// CHECK-32-EX-NEXT: br label [[IFCONT4]]
+// CHECK-32-EX-NEXT: br label [[IFCONT5]]
// CHECK-32-EX: ifcont5:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-32-EX-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK-32-EX: then8:
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK-32-EX-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
-// CHECK-32-EX-NEXT: br label [[IFCONT8:%.*]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK-32-EX-NEXT: store volatile i32 [[TMP18]], ptr addrspace(3) [[TMP17]], align 4
+// CHECK-32-EX-NEXT: br label [[IFCONT10:%.*]]
// CHECK-32-EX: else9:
-// CHECK-32-EX-NEXT: br label [[IFCONT8]]
+// CHECK-32-EX-NEXT: br label [[IFCONT10]]
// CHECK-32-EX: ifcont10:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK-32-EX: then13:
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
-// CHECK-32-EX-NEXT: br label [[IFCONT12:%.*]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load volatile i32, ptr addrspace(3) [[TMP20]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4
+// CHECK-32-EX-NEXT: br label [[IFCONT15:%.*]]
// CHECK-32-EX: else14:
-// CHECK-32-EX-NEXT: br label [[IFCONT12]]
+// CHECK-32-EX-NEXT: br label [[IFCONT15]]
// CHECK-32-EX: ifcont15:
// CHECK-32-EX-NEXT: ret void
//
@@ -1866,25 +2017,29 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META10:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[TMP4]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: store ptr [[TMP1]], ptr [[TMP5]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
+// CHECK-32-EX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -1894,53 +2049,60 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK-32-EX-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK-32-EX-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META10]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1
-// CHECK-32-EX-NEXT: store i32 [[OR]], ptr [[A1]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-32-EX-NEXT: store i32 [[OR]], ptr [[A1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ]
// CHECK-32-EX-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16
-// CHECK-32-EX-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK-32-EX-NEXT: store ptr [[A1]], ptr [[TMP5]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK-32-EX-NEXT: store ptr [[B2]], ptr [[TMP6]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4)
+// CHECK-32-EX-NEXT: store i16 [[CONV4]], ptr [[B2_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP5]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK-32-EX-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP6]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func3, ptr @_omp_reduction_inter_warp_copy_func4)
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK-32-EX: .omp.reduction.then:
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[OR5:%.*]] = or i32 [[TMP9]], [[TMP10]]
// CHECK-32-EX-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK-32-EX-NEXT: [[CONV6:%.*]] = sext i16 [[TMP11]] to i32
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[CONV7:%.*]] = sext i16 [[TMP12]] to i32
// CHECK-32-EX-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK-32-EX-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
@@ -1948,7 +2110,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK-32-EX-NEXT: br label [[COND_END11:%.*]]
// CHECK-32-EX: cond.false10:
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK-32-EX-NEXT: br label [[COND_END11]]
// CHECK-32-EX: cond.end11:
// CHECK-32-EX-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP13]], [[COND_TRUE9]] ], [ [[TMP14]], [[COND_FALSE10]] ]
@@ -1961,36 +2123,43 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3
// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK-32-EX-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-32-EX-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK-32-EX-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK-32-EX-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK-32-EX-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK-32-EX-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -1998,10 +2167,10 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK-32-EX-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK-32-EX-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK-32-EX-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK-32-EX-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
-// CHECK-32-EX-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK-32-EX-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK-32-EX-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 4
// CHECK-32-EX-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-32-EX-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -2016,7 +2185,7 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK-32-EX-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32-EX: then:
-// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3]]
+// CHECK-32-EX-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l35_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3]]
// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]]
// CHECK-32-EX: else:
// CHECK-32-EX-NEXT: br label [[IFCONT]]
@@ -2026,13 +2195,13 @@ int bar(int n){
// CHECK-32-EX-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK-32-EX-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK-32-EX: then5:
-// CHECK-32-EX-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK-32-EX-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK-32-EX-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK-32-EX-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK-32-EX-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK-32-EX-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK-32-EX-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK-32-EX-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -2048,73 +2217,75 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4
// CHECK-32-EX-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-32-EX-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-32-EX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-32-EX-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-EX-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK-32-EX-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK-32-EX-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK-32-EX: then:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK-32-EX-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK-32-EX-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK-32-EX-NEXT: br label [[IFCONT:%.*]]
// CHECK-32-EX: else:
// CHECK-32-EX-NEXT: br label [[IFCONT]]
// CHECK-32-EX: ifcont:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK-32-EX: then3:
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK-32-EX-NEXT: br label [[IFCONT4:%.*]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK-32-EX-NEXT: br label [[IFCONT5:%.*]]
// CHECK-32-EX: else4:
-// CHECK-32-EX-NEXT: br label [[IFCONT4]]
+// CHECK-32-EX-NEXT: br label [[IFCONT5]]
// CHECK-32-EX: ifcont5:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK-32-EX-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-32-EX-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK-32-EX: then8:
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK-32-EX-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK-32-EX-NEXT: br label [[IFCONT8:%.*]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK-32-EX-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK-32-EX-NEXT: br label [[IFCONT10:%.*]]
// CHECK-32-EX: else9:
-// CHECK-32-EX-NEXT: br label [[IFCONT8]]
+// CHECK-32-EX-NEXT: br label [[IFCONT10]]
// CHECK-32-EX: ifcont10:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK-32-EX-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-32-EX-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK-32-EX-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK-32-EX: then13:
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK-32-EX-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK-32-EX-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK-32-EX-NEXT: br label [[IFCONT12:%.*]]
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK-32-EX-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK-32-EX-NEXT: br label [[IFCONT15:%.*]]
// CHECK-32-EX: else14:
-// CHECK-32-EX-NEXT: br label [[IFCONT12]]
+// CHECK-32-EX-NEXT: br label [[IFCONT15]]
// CHECK-32-EX: ifcont15:
// CHECK-32-EX-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
index 20e344f0a34a0..92dd5cd4102f6 100644
--- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
+++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp
@@ -33,18 +33,21 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[TBAA10:![0-9]+]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !tbaa [[TBAA6:![0-9]+]]
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA15:![0-9]+]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4, !tbaa [[TBAA10:![0-9]+]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -54,101 +57,113 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined
// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17:![0-9]+]]
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[IB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IB]] to ptr
+// CHECK1-NEXT: [[REF_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP]] to ptr
+// CHECK1-NEXT: [[REF_TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP2]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !tbaa [[TBAA12:![0-9]+]]
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
// CHECK1-NEXT: [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 8)
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[IB]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_IV]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_LB]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_UB]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_STRIDE]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_IS_LAST]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[IB]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA19:![0-9]+]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP2]]) #[[ATTR4]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP2]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR11:[0-9]+]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP2]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP]]) #[[ATTR4]]
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP_ASCAST]], align 4, !tbaa [[TBAA14:![0-9]+]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP2]]) #[[ATTR4]]
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP2_ASCAST]], align 4, !tbaa [[TBAA14]]
+// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP_ASCAST]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP2_ASCAST]]) #[[ATTR11:[0-9]+]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP2]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4
-// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4
-// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA21:![0-9]+]]
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA21]]
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA21]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA16:![0-9]+]]
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA16]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA16]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[IB]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[IB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_IS_LAST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_STRIDE]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_UB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_LB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_IV]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[PARTIAL_SUM]], i64 8)
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[IEND]], i64 4)
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ISTART]], i64 4)
@@ -158,15 +173,18 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_
// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5:[0-9]+]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23:![0-9]+]]
-// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25:![0-9]+]]
-// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: [[__RE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__RE_ADDR]] to ptr
+// CHECK1-NEXT: [[__IM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__IM_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA18:![0-9]+]]
+// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR_ASCAST]], align 8, !tbaa [[TBAA20:![0-9]+]]
+// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR_ASCAST]], align 8, !tbaa [[TBAA20]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR_ASCAST]], align 8
// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[THIS1]], ptr nonnull align 4 dereferenceable(4) [[TMP0]], ptr nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR11]]
// CHECK1-NEXT: ret void
//
@@ -174,102 +192,125 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4
-// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4
-// CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[ISTART_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ISTART_ADDR]] to ptr
+// CHECK1-NEXT: [[IEND_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IEND_ADDR]] to ptr
+// CHECK1-NEXT: [[PARTIAL_SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PARTIAL_SUM_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[PARTIAL_SUM5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PARTIAL_SUM5]] to ptr
+// CHECK1-NEXT: [[REF_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP]] to ptr
+// CHECK1-NEXT: [[REF_TMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP6]] to ptr
+// CHECK1-NEXT: [[I7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I7]] to ptr
+// CHECK1-NEXT: [[REF_TMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP14]] to ptr
+// CHECK1-NEXT: [[REF_TMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP15]] to ptr
+// CHECK1-NEXT: [[REF_TMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP16]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR_ASCAST]], align 8, !tbaa [[TBAA12]], !nonnull [[META22:![0-9]+]], !align [[META23:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR_ASCAST]], align 8, !tbaa [[TBAA12]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_IV]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1
// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1
// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[I]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[I]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP6]]) #[[ATTR4]]
-// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP6]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR11]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP6]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I7]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_LB]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_UB]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_STRIDE]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_IS_LAST]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[PARTIAL_SUM5]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP]]) #[[ATTR4]]
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP_ASCAST]], align 4, !tbaa [[TBAA14]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP6]]) #[[ATTR4]]
+// CHECK1-NEXT: store float 0.000000e+00, ptr [[REF_TMP6_ASCAST]], align 4, !tbaa [[TBAA14]]
+// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5_ASCAST]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP_ASCAST]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP6_ASCAST]]) #[[ATTR11]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP6]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[I7]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3:[0-9]+]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP19]], 1
// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]]
// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]]
@@ -278,133 +319,143 @@ void test() {
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1
// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]]
// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP23]], 1
// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP14]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP15]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP14]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP15]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to float
-// CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP15]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP16]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store float [[CONV]], ptr [[REF_TMP15_ASCAST]], align 4, !tbaa [[TBAA14]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP16]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP25]] to float
-// CHECK1-NEXT: store float [[CONV17]], ptr [[REF_TMP16]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR11]]
-// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR11]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP16]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP15]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP14]]) #[[ATTR4]]
+// CHECK1-NEXT: store float [[CONV17]], ptr [[REF_TMP16_ASCAST]], align 4, !tbaa [[TBAA14]]
+// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(ptr nonnull align 4 dereferenceable(8) [[REF_TMP14_ASCAST]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP15_ASCAST]], ptr nonnull align 4 dereferenceable(4) [[REF_TMP16_ASCAST]]) #[[ATTR11]]
+// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5_ASCAST]], ptr nonnull align 4 dereferenceable(8) [[REF_TMP14_ASCAST]]) #[[ATTR11]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP16]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP15]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP14]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP26]], 1
-// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]]
-// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]]
-// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]])
-// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP33]], align 8
-// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[PARTIAL_SUM5_ASCAST]], ptr [[TMP33]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP34]], 1
// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
-// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[TMP2]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR11]]
+// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) ptr @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(ptr nonnull align 4 dereferenceable(8) [[TMP2]], ptr nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5_ASCAST]]) #[[ATTR11]]
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK1: .omp.reduction.done:
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I7]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[I7]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[PARTIAL_SUM5]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_IS_LAST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_STRIDE]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_UB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_LB]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_IV]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E
// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]]
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: [[__C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__C_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]]
+// CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]], !nonnull [[META22]], !align [[META23]]
// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(ptr nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR11]]
// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA27:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA24:![0-9]+]]
// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]]
-// CHECK1-NEXT: store float [[ADD]], ptr [[__RE_]], align 4, !tbaa [[TBAA27]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA23]]
+// CHECK1-NEXT: store float [[ADD]], ptr [[__RE_]], align 4, !tbaa [[TBAA24]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]], !nonnull [[META22]], !align [[META23]]
// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(ptr nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR11]]
// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA29:![0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA26:![0-9]+]]
// CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]]
-// CHECK1-NEXT: store float [[ADD3]], ptr [[__IM_]], align 4, !tbaa [[TBAA29]]
+// CHECK1-NEXT: store float [[ADD3]], ptr [[__IM_]], align 4, !tbaa [[TBAA26]]
// CHECK1-NEXT: ret ptr [[THIS1]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr %"class.std::complex", ptr [[TMP9]], i64 1
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK1-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK1-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK1-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -419,7 +470,7 @@ void test() {
// CHECK1-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK1-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
@@ -429,7 +480,7 @@ void test() {
// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK1-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK1: then4:
-// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
@@ -444,21 +495,24 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[PRECOND:%.*]]
// CHECK1: precond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
// CHECK1-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK1: body:
@@ -479,7 +533,7 @@ void test() {
// CHECK1: ifcont:
// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK1: then3:
@@ -494,7 +548,7 @@ void test() {
// CHECK1-NEXT: br label [[IFCONT5]]
// CHECK1: ifcont5:
// CHECK1-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[PRECOND]]
// CHECK1: exit:
// CHECK1-NEXT: ret void
@@ -503,40 +557,47 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined_wrapper
// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA30:![0-9]+]]
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2, !tbaa [[TBAA27:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA32:![0-9]+]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA6]]
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA32]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA6]]
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA34:![0-9]+]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA6]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16
// CHECK1-SAME: (ptr noalias [[DYN_PTR:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !tbaa [[TBAA6]]
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -546,101 +607,113 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined
// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8
-// CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[IB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IB]] to ptr
+// CHECK1-NEXT: [[REF_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP]] to ptr
+// CHECK1-NEXT: [[REF_TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP2]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
// CHECK1-NEXT: [[ISTART:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: [[IEND:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 ptr @__kmpc_alloc_shared(i64 16)
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[IB]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_IV]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_LB]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_UB]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_STRIDE]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_IS_LAST]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[IB]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 99
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA36:![0-9]+]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP2]]) #[[ATTR4]]
-// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP2]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR11]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP2]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[IB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP]]) #[[ATTR4]]
+// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP_ASCAST]], align 8, !tbaa [[TBAA29:![0-9]+]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP2]]) #[[ATTR4]]
+// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP2_ASCAST]], align 8, !tbaa [[TBAA29]]
+// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP_ASCAST]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP2_ASCAST]]) #[[ATTR11]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP2]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[IB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP8]], 4
-// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[MUL3]], ptr [[ISTART]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[IB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4
-// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA21]]
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA21]]
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA21]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: store i32 [[MUL5]], ptr [[IEND]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[ISTART]], ptr [[TMP10]], align 8, !tbaa [[TBAA16]]
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[IEND]], ptr [[TMP11]], align 8, !tbaa [[TBAA16]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[TMP12]], align 8, !tbaa [[TBAA16]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP1]])
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[IB]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[IB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_IS_LAST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_STRIDE]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_UB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_LB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_IV]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[PARTIAL_SUM]], i64 16)
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[IEND]], i64 4)
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ISTART]], i64 4)
@@ -650,15 +723,18 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_
// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38:![0-9]+]]
-// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40:![0-9]+]]
-// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: [[__RE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__RE_ADDR]] to ptr
+// CHECK1-NEXT: [[__IM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__IM_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA31:![0-9]+]]
+// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR_ASCAST]], align 8, !tbaa [[TBAA33:![0-9]+]]
+// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR_ASCAST]], align 8, !tbaa [[TBAA33]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__IM_ADDR_ASCAST]], align 8
// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[THIS1]], ptr nonnull align 8 dereferenceable(8) [[TMP0]], ptr nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR11]]
// CHECK1-NEXT: ret void
//
@@ -666,102 +742,125 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[ISTART:%.*]], ptr nonnull align 4 dereferenceable(4) [[IEND:%.*]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8
-// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8
-// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8
-// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8
-// CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8
-// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR]], align 8, !tbaa [[TBAA17]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[ISTART_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ISTART_ADDR]] to ptr
+// CHECK1-NEXT: [[IEND_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IEND_ADDR]] to ptr
+// CHECK1-NEXT: [[PARTIAL_SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PARTIAL_SUM_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[PARTIAL_SUM5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[PARTIAL_SUM5]] to ptr
+// CHECK1-NEXT: [[REF_TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP]] to ptr
+// CHECK1-NEXT: [[REF_TMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP6]] to ptr
+// CHECK1-NEXT: [[I7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I7]] to ptr
+// CHECK1-NEXT: [[REF_TMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP14]] to ptr
+// CHECK1-NEXT: [[REF_TMP15_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP15]] to ptr
+// CHECK1-NEXT: [[REF_TMP16_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[REF_TMP16]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[ISTART]], ptr [[ISTART_ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[IEND]], ptr [[IEND_ADDR_ASCAST]], align 8, !tbaa [[TBAA12]]
+// CHECK1-NEXT: store ptr [[PARTIAL_SUM]], ptr [[PARTIAL_SUM_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ISTART_ADDR_ASCAST]], align 8, !tbaa [[TBAA12]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[IEND_ADDR_ASCAST]], align 8, !tbaa [[TBAA12]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PARTIAL_SUM_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]], !nonnull [[META22]], !align [[META35:![0-9]+]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_IV]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1
// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1
// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[I]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP7]], ptr [[I_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[I]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP8]], [[TMP9]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP6]]) #[[ATTR4]]
-// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP6]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR11]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP6]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[I7]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_LB]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_UB]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_STRIDE]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[DOTOMP_IS_LAST]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[PARTIAL_SUM5]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP]]) #[[ATTR4]]
+// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP_ASCAST]], align 8, !tbaa [[TBAA29]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP6]]) #[[ATTR4]]
+// CHECK1-NEXT: store double 0.000000e+00, ptr [[REF_TMP6_ASCAST]], align 8, !tbaa [[TBAA29]]
+// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5_ASCAST]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP_ASCAST]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP6_ASCAST]]) #[[ATTR11]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP6]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[I7]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB3]], i32 [[TMP12]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 [[TMP17]], ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP19]], 1
// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP18]], [[ADD9]]
// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]]
@@ -770,129 +869,139 @@ void test() {
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP21]], 1
// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP20]], [[ADD11]]
// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]]
// CHECK1: omp.inner.for.cond.cleanup:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP23]], 1
// CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP14]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP15]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD13]], ptr [[I7_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP14]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP15]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[I7_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP24]] to double
-// CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP15]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT: call void @llvm.lifetime.start.p0(ptr [[REF_TMP16]]) #[[ATTR4]]
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store double [[CONV]], ptr [[REF_TMP15_ASCAST]], align 8, !tbaa [[TBAA29]]
+// CHECK1-NEXT: call void @llvm.lifetime.start.p5(ptr addrspace(5) [[REF_TMP16]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[I7_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP25]] to double
-// CHECK1-NEXT: store double [[CONV17]], ptr [[REF_TMP16]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR11]]
-// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], ptr nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR11]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP16]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP15]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[REF_TMP14]]) #[[ATTR4]]
+// CHECK1-NEXT: store double [[CONV17]], ptr [[REF_TMP16_ASCAST]], align 8, !tbaa [[TBAA29]]
+// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(ptr nonnull align 8 dereferenceable(16) [[REF_TMP14_ASCAST]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP15_ASCAST]], ptr nonnull align 8 dereferenceable(8) [[REF_TMP16_ASCAST]]) #[[ATTR11]]
+// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5_ASCAST]], ptr nonnull align 8 dereferenceable(16) [[REF_TMP14_ASCAST]]) #[[ATTR11]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP16]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP15]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[REF_TMP14]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP26]], 1
-// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD18]], ptr [[DOTOMP_IV_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP27]], [[TMP28]]
-// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD19]], ptr [[DOTOMP_LB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP29]], [[TMP30]]
-// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: store i32 [[ADD20]], ptr [[DOTOMP_UB_ASCAST]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA15]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !tbaa [[TBAA10]]
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP32]])
-// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[PARTIAL_SUM5]], ptr [[TMP33]], align 8
-// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[PARTIAL_SUM5_ASCAST]], ptr [[TMP33]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 16, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2)
// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i32 [[TMP34]], 1
// CHECK1-NEXT: br i1 [[TMP35]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
-// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[TMP2]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR11]]
+// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) ptr @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(ptr nonnull align 8 dereferenceable(16) [[TMP2]], ptr nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5_ASCAST]]) #[[ATTR11]]
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
// CHECK1: .omp.reduction.done:
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[I7]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[PARTIAL_SUM5]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IS_LAST]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_STRIDE]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_UB]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_LB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[I7]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[PARTIAL_SUM5]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_IS_LAST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_STRIDE]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_UB]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_LB]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.end:
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
-// CHECK1-NEXT: call void @llvm.lifetime.end.p0(ptr [[DOTOMP_IV]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_2]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_1]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTCAPTURE_EXPR_]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @llvm.lifetime.end.p5(ptr addrspace(5) [[DOTOMP_IV]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E
// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]]
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: [[__C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__C_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]]
+// CHECK1-NEXT: store ptr [[__C]], ptr [[__C_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__C_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]], !nonnull [[META22]], !align [[META35]]
// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(ptr nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR11]]
// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA42:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA36:![0-9]+]]
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]]
-// CHECK1-NEXT: store double [[ADD]], ptr [[__RE_]], align 8, !tbaa [[TBAA42]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR]], align 8, !tbaa [[TBAA38]]
+// CHECK1-NEXT: store double [[ADD]], ptr [[__RE_]], align 8, !tbaa [[TBAA36]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__C_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]], !nonnull [[META22]], !align [[META35]]
// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(ptr nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR11]]
// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA44:![0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA38:![0-9]+]]
// CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]]
-// CHECK1-NEXT: store double [[ADD3]], ptr [[__IM_]], align 8, !tbaa [[TBAA44]]
+// CHECK1-NEXT: store double [[ADD3]], ptr [[__IM_]], align 8, !tbaa [[TBAA38]]
// CHECK1-NEXT: ret ptr [[THIS1]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr %"class.std::complex.0", ptr [[TMP9]], i64 1
// CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]]
// CHECK1: .shuffle.pre_cond:
// CHECK1-NEXT: [[TMP12:%.*]] = phi ptr [ [[TMP9]], [[ENTRY:%.*]] ], [ [[TMP23:%.*]], [[DOTSHUFFLE_THEN:%.*]] ]
-// CHECK1-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ]
+// CHECK1-NEXT: [[TMP13:%.*]] = phi ptr [ [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], [[ENTRY]] ], [ [[TMP24:%.*]], [[DOTSHUFFLE_THEN]] ]
// CHECK1-NEXT: [[TMP14:%.*]] = ptrtoint ptr [[TMP11]] to i64
// CHECK1-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP12]] to i64
// CHECK1-NEXT: [[TMP16:%.*]] = sub i64 [[TMP14]], [[TMP15]]
@@ -909,7 +1018,7 @@ void test() {
// CHECK1-NEXT: [[TMP24]] = getelementptr i64, ptr [[TMP13]], i64 1
// CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]]
// CHECK1: .shuffle.exit:
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK1-NEXT: [[TMP26:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK1-NEXT: [[TMP27:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -924,7 +1033,7 @@ void test() {
// CHECK1-NEXT: [[TMP36:%.*]] = or i1 [[TMP35]], [[TMP34]]
// CHECK1-NEXT: br i1 [[TMP36]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
@@ -934,7 +1043,7 @@ void test() {
// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]]
// CHECK1-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK1: then4:
-// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8
// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8
@@ -949,21 +1058,24 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[PRECOND:%.*]]
// CHECK1: precond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 4
// CHECK1-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK1: body:
@@ -984,7 +1096,7 @@ void test() {
// CHECK1: ifcont:
// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK1: then3:
@@ -999,7 +1111,7 @@ void test() {
// CHECK1-NEXT: br label [[IFCONT5]]
// CHECK1: ifcont5:
// CHECK1-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[PRECOND]]
// CHECK1: exit:
// CHECK1-NEXT: ret void
@@ -1008,107 +1120,129 @@ void test() {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined_wrapper
// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR8]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2, !tbaa [[TBAA30]]
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4, !tbaa [[TBAA15]]
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2, !tbaa [[TBAA27]]
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4, !tbaa [[TBAA10]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA32]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8, !tbaa [[TBAA6]]
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 1
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA32]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA6]]
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 2
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA45:![0-9]+]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8, !tbaa [[TBAA6]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]], ptr [[TMP6]], ptr [[TMP8]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_
// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr nonnull align 4 dereferenceable(4) [[__RE:%.*]], ptr nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25]]
-// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: [[__RE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__RE_ADDR]] to ptr
+// CHECK1-NEXT: [[__IM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__IM_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]]
+// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR_ASCAST]], align 8, !tbaa [[TBAA20]]
+// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR_ASCAST]], align 8, !tbaa [[TBAA20]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA25]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT: store float [[TMP1]], ptr [[__RE_]], align 4, !tbaa [[TBAA27]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR_ASCAST]], align 8, !tbaa [[TBAA20]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load float, ptr [[TMP0]], align 4, !tbaa [[TBAA14]]
+// CHECK1-NEXT: store float [[TMP1]], ptr [[__RE_]], align 4, !tbaa [[TBAA24]]
// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA25]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[TBAA19]]
-// CHECK1-NEXT: store float [[TMP3]], ptr [[__IM_]], align 4, !tbaa [[TBAA29]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR_ASCAST]], align 8, !tbaa [[TBAA20]], !nonnull [[META22]], !align [[META23]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[TMP2]], align 4, !tbaa [[TBAA14]]
+// CHECK1-NEXT: store float [[TMP3]], ptr [[__IM_]], align 4, !tbaa [[TBAA26]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv
// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA27]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[__RE_]], align 4, !tbaa [[TBAA24]]
// CHECK1-NEXT: ret float [[TMP0]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv
// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA23]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA18]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA29]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load float, ptr [[__IM_]], align 4, !tbaa [[TBAA26]]
// CHECK1-NEXT: ret float [[TMP0]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_
// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr nonnull align 8 dereferenceable(8) [[__RE:%.*]], ptr nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40]]
-// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: [[__RE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__RE_ADDR]] to ptr
+// CHECK1-NEXT: [[__IM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[__IM_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]]
+// CHECK1-NEXT: store ptr [[__RE]], ptr [[__RE_ADDR_ASCAST]], align 8, !tbaa [[TBAA33]]
+// CHECK1-NEXT: store ptr [[__IM]], ptr [[__IM_ADDR_ASCAST]], align 8, !tbaa [[TBAA33]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR]], align 8, !tbaa [[TBAA40]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT: store double [[TMP1]], ptr [[__RE_]], align 8, !tbaa [[TBAA42]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__RE_ADDR_ASCAST]], align 8, !tbaa [[TBAA33]], !nonnull [[META22]], !align [[META35]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8, !tbaa [[TBAA29]]
+// CHECK1-NEXT: store double [[TMP1]], ptr [[__RE_]], align 8, !tbaa [[TBAA36]]
// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR]], align 8, !tbaa [[TBAA40]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA36]]
-// CHECK1-NEXT: store double [[TMP3]], ptr [[__IM_]], align 8, !tbaa [[TBAA44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__IM_ADDR_ASCAST]], align 8, !tbaa [[TBAA33]], !nonnull [[META22]], !align [[META35]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP2]], align 8, !tbaa [[TBAA29]]
+// CHECK1-NEXT: store double [[TMP3]], ptr [[__IM_]], align 8, !tbaa [[TBAA38]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv
// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA42]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__RE_]], align 8, !tbaa [[TBAA36]]
// CHECK1-NEXT: ret double [[TMP0]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv
// CHECK1-SAME: (ptr nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8, !tbaa [[TBAA38]]
-// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK1-NEXT: [[THIS_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[THIS_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR_ASCAST]], align 8, !tbaa [[TBAA31]]
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds nuw %"class.std::complex.0", ptr [[THIS1]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA44]]
+// CHECK1-NEXT: [[TMP0:%.*]] = load double, ptr [[__IM_]], align 8, !tbaa [[TBAA38]]
// CHECK1-NEXT: ret double [[TMP0]]
//
diff --git a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp
index 9f98c18ab1dcf..3fce7abe36a6e 100644
--- a/clang/test/OpenMP/nvptx_target_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_simd_codegen.cpp
@@ -61,69 +61,78 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29
// CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-64-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
-// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK45-64-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK45-64: simd.if.then:
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
-// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK45-64-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
-// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
-// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK45-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK45-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
// CHECK45-64: omp.inner.for.end:
-// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0
// CHECK45-64-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
// CHECK45-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1
// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[I3_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[SIMD_IF_END]]
// CHECK45-64: simd.if.end:
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
@@ -133,73 +142,82 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-64-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
-// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK45-64-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK45-64: simd.if.then:
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK45-64-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
-// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK45-64-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
// CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-64-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16
-// CHECK45-64-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
-// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
// CHECK45-64: omp.inner.for.end:
-// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0
// CHECK45-64-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
// CHECK45-64-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
// CHECK45-64-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
-// CHECK45-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD12]], ptr [[I3_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[SIMD_IF_END]]
// CHECK45-64: simd.if.end:
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
@@ -209,47 +227,52 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
-// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
+// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK45-64-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-64-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
-// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK45-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK45-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
// CHECK45-64: omp.inner.for.end:
-// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
//
@@ -257,54 +280,61 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK45-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[N1:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[N1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-64-NEXT: [[N1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N1]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[N1]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[N1_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10
// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK45-64-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-64-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
-// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK45-64: worker.exit:
// CHECK45-64-NEXT: ret void
// CHECK45-64: omp.inner.for.end:
-// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
+// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
@@ -314,68 +344,77 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29
// CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
-// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK45-32-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK45-32: simd.if.then:
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
-// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK45-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
-// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP8]]
-// CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
-// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK45-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK45-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
// CHECK45-32: omp.inner.for.end:
-// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0
// CHECK45-32-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
// CHECK45-32-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1
// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[I3_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32: simd.if.end:
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
@@ -385,72 +424,81 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
-// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK45-32-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK45-32: simd.if.then:
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK45-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
-// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP8]]
-// CHECK45-32-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
// CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-32-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16
-// CHECK45-32-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
-// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
// CHECK45-32: omp.inner.for.end:
-// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0
// CHECK45-32-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
// CHECK45-32-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
// CHECK45-32-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
-// CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[I3_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32: simd.if.end:
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
@@ -460,46 +508,51 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
-// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
+// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP4]]
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK45-32-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
-// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK45-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK45-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
// CHECK45-32: omp.inner.for.end:
-// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
//
@@ -507,53 +560,60 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK45-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[N1:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-NEXT: [[N1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N1]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[N1]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[N1_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10
// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP5]]
-// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK45-32-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
-// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK45-32: worker.exit:
// CHECK45-32-NEXT: ret void
// CHECK45-32: omp.inner.for.end:
-// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
+// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
@@ -563,68 +623,77 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-EX-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK45-32-EX: simd.if.then:
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
-// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
-// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP8]]
-// CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: omp.inner.for.end:
-// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0
// CHECK45-32-EX-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
// CHECK45-32-EX-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1
// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32-EX: simd.if.end:
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
@@ -634,72 +703,81 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-EX-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK45-32-EX: simd.if.then:
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
-// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP8]]
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
// CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-32-EX-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16
-// CHECK45-32-EX-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
-// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: omp.inner.for.end:
-// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0
// CHECK45-32-EX-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
// CHECK45-32-EX-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
// CHECK45-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK45-32-EX: simd.if.end:
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
@@ -709,46 +787,51 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP4]]
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
-// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: omp.inner.for.end:
-// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
//
@@ -756,53 +839,60 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK45-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[N1:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-EX-NEXT: [[N1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N1]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[N1]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[N1_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP5]]
-// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
-// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK45-32-EX: worker.exit:
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: omp.inner.for.end:
-// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
@@ -812,69 +902,78 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK-64: simd.if.then:
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK-64-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64: omp.inner.for.end:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0
// CHECK-64-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
// CHECK-64-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1
// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
+// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[I3_ASCAST]], align 4
// CHECK-64-NEXT: br label [[SIMD_IF_END]]
// CHECK-64: simd.if.end:
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
@@ -884,73 +983,82 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK-64: simd.if.then:
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK-64-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-64-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16
-// CHECK-64-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64: omp.inner.for.end:
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0
// CHECK-64-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
// CHECK-64-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
// CHECK-64-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
-// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
+// CHECK-64-NEXT: store i32 [[ADD12]], ptr [[I3_ASCAST]], align 4
// CHECK-64-NEXT: br label [[SIMD_IF_END]]
// CHECK-64: simd.if.end:
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
@@ -960,47 +1068,52 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-64-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64: omp.inner.for.end:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
//
@@ -1008,54 +1121,61 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK-64-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[N1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[N1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[N1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N1]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[N1]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[N1_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP5]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-64-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK-64: worker.exit:
// CHECK-64-NEXT: ret void
// CHECK-64: omp.inner.for.end:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
@@ -1065,68 +1185,77 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK-32: simd.if.then:
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP8]]
-// CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32: omp.inner.for.end:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0
// CHECK-32-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
// CHECK-32-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1
// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
+// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[I3_ASCAST]], align 4
// CHECK-32-NEXT: br label [[SIMD_IF_END]]
// CHECK-32: simd.if.end:
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
@@ -1136,72 +1265,81 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK-32: simd.if.then:
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK-32-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP8]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-32-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16
-// CHECK-32-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32: omp.inner.for.end:
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0
// CHECK-32-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
// CHECK-32-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
// CHECK-32-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
-// CHECK-32-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
+// CHECK-32-NEXT: store i32 [[ADD12]], ptr [[I3_ASCAST]], align 4
// CHECK-32-NEXT: br label [[SIMD_IF_END]]
// CHECK-32: simd.if.end:
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
@@ -1211,46 +1349,51 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP4]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32: omp.inner.for.end:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
//
@@ -1258,53 +1401,60 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK-32-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[N1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[N1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N1]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[N1]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[N1_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP5]]
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK-32: worker.exit:
// CHECK-32-NEXT: ret void
// CHECK-32: omp.inner.for.end:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
@@ -1314,68 +1464,77 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK-32-EX: simd.if.then:
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP8]]
-// CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP24]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: omp.inner.for.end:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[SUB7:%.*]] = sub nsw i32 [[TMP10]], 0
// CHECK-32-EX-NEXT: [[DIV8:%.*]] = sdiv i32 [[SUB7]], 1
// CHECK-32-EX-NEXT: [[MUL9:%.*]] = mul nsw i32 [[DIV8]], 1
// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
-// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[I3_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK-32-EX: simd.if.end:
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
@@ -1385,72 +1544,81 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]]
// CHECK-32-EX: simd.if.then:
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp slt i32 [[TMP5]], [[ADD]]
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP8]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32
// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-32-EX-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16
-// CHECK-32-EX-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: store i16 [[CONV7]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: omp.inner.for.end:
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[SUB9:%.*]] = sub nsw i32 [[TMP11]], 0
// CHECK-32-EX-NEXT: [[DIV10:%.*]] = sdiv i32 [[SUB9]], 1
// CHECK-32-EX-NEXT: [[MUL11:%.*]] = mul nsw i32 [[DIV10]], 1
// CHECK-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL11]]
-// CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[I3_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[SIMD_IF_END]]
// CHECK-32-EX: simd.if.end:
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
@@ -1460,46 +1628,51 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP4]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-EX-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: omp.inner.for.end:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
//
@@ -1507,53 +1680,60 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK-32-EX-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[N:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[N1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[N1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N1]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[N_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP2]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[N1]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[N1_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP3]], 10
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP5]]
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-EX-NEXT: store i32 [[ADD2]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK-32-EX: worker.exit:
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: omp.inner.for.end:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[N1_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[TMP1]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
index 5d6efc1b126a5..acda9cc7a2cab 100644
--- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -52,24 +52,29 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1
-// CHECK1-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1
-// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR_ASCAST]], align 1
+// CHECK1-NEXT: store i8 [[TMP2]], ptr [[A_CASTED_ASCAST]], align 1
+// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -79,37 +84,45 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i8 49, ptr [[A_ADDR]], align 1
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i8 49, ptr [[A_ADDR_ASCAST]], align 1
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -119,37 +132,45 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: store i16 1, ptr [[AA_ADDR]], align 2
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i16 1, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[AA_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -159,50 +180,61 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[AA_ADDR_ASCAST]], ptr [[TMP0]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META8]]
// CHECK1-NEXT: store i16 1, ptr [[TMP0]], align 2
// CHECK1-NEXT: ret void
//
@@ -210,24 +242,29 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR]], align 1
-// CHECK2-NEXT: store i8 [[TMP2]], ptr [[A_CASTED]], align 1
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load i8, ptr [[A_ADDR_ASCAST]], align 1
+// CHECK2-NEXT: store i8 [[TMP2]], ptr [[A_CASTED_ASCAST]], align 1
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[A_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -237,37 +274,45 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i8 49, ptr [[A_ADDR]], align 1
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i8 49, ptr [[A_ADDR_ASCAST]], align 1
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -277,37 +322,45 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l28_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: store i16 1, ptr [[AA_ADDR]], align 2
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i16 1, ptr [[AA_ADDR_ASCAST]], align 2
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR]], align 2
-// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED]], align 2
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[AA_ADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP2]], ptr [[AA_CASTED_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[AA_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP3]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -317,50 +370,61 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[AA_ADDR]], ptr [[TMP0]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[AA_ADDR_ASCAST]], ptr [[TMP0]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
+// CHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
// CHECK2-NEXT: store i16 1, ptr [[TMP0]], align 2
// CHECK2-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
index 48ac5a799346c..0e77e4c7eddcf 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
@@ -35,18 +35,21 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -56,59 +59,68 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[I:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[I]], ptr [[TMP8]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -121,13 +133,16 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
@@ -137,36 +152,43 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined_wrapper
// CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK1-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 8
// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i64 0
// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR3]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR3]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -176,59 +198,68 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[I:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP1]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[I]], ptr [[TMP8]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 1)
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1
-// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -241,13 +272,16 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 4, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP0]], align 4
@@ -257,17 +291,21 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined_wrapper
// CHECK2-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR]], align 2
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
-// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[GLOBAL_ARGS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GLOBAL_ARGS]] to ptr
+// CHECK2-NEXT: store i16 [[TMP0]], ptr [[DOTADDR_ASCAST]], align 2
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS_ASCAST]])
+// CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS_ASCAST]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP4]]) #[[ATTR3]]
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_omp_outlined_omp_outlined(ptr [[DOTADDR1_ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP4]]) #[[ATTR3]]
// CHECK2-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
index 8397b93cbeedb..759cd0105d752 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
@@ -74,33 +74,41 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10:![0-9]+]], !align [[META11:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -110,142 +118,160 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr
// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK1: cond.true10:
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12:%.*]]
// CHECK1: cond.false11:
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12]]
// CHECK1: cond.end12:
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ]
-// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]])
-// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK1: .omp.lastprivate.then:
-// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP48]], ptr [[L_ADDR]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP48]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK1: .omp.lastprivate.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -256,128 +282,145 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32)
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 32)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32
// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK1-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4
-// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1
-// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK1: .omp.lastprivate.then:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP30]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK1: .omp.lastprivate.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -388,27 +431,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META12:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -418,126 +467,142 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META12]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK1: cond.true10:
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12:%.*]]
// CHECK1: cond.false11:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12]]
// CHECK1: cond.end12:
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
-// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -548,70 +613,86 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META12]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
@@ -623,15 +704,15 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -642,21 +723,25 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -666,88 +751,99 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr
// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK1: cond.true5:
// CHECK1-NEXT: br label [[COND_END7:%.*]]
// CHECK1: cond.false6:
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END7]]
// CHECK1: cond.end7:
// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ]
-// CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -759,52 +855,64 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -814,10 +922,10 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -829,27 +937,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -859,100 +973,116 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr
// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99
// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK1: cond.true6:
// CHECK1-NEXT: br label [[COND_END8:%.*]]
// CHECK1: cond.false7:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END8]]
// CHECK1: cond.end8:
// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ]
-// CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -964,77 +1094,93 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]]
// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
-// CHECK1-NEXT: store i32 10, ptr [[K]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4
+// CHECK1-NEXT: store i32 10, ptr [[K_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K_ASCAST]], align 4
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
// CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4
@@ -1042,10 +1188,10 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -1057,27 +1203,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -1087,142 +1239,162 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I9]] to ptr
+// CHECK1-NEXT: [[J10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J10]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: land.lhs.true:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
// CHECK1-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]])
-// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV11]])
+// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]]
// CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1
// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP19]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr
// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr
// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP32]], [[TMP33]]
-// CHECK1-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP34]], [[TMP35]]
-// CHECK1-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP36]], [[TMP37]]
// CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]]
// CHECK1: cond.true18:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: br label [[COND_END20:%.*]]
// CHECK1: cond.false19:
-// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK1-NEXT: br label [[COND_END20]]
// CHECK1: cond.end20:
// CHECK1-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE18]] ], [ [[TMP39]], [[COND_FALSE19]] ]
-// CHECK1-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP40]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP40]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP42]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -1233,81 +1405,101 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I9]] to ptr
+// CHECK1-NEXT: [[J10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J10]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: land.lhs.true:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1)
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP11:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP16]], 0
// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]]
@@ -1316,16 +1508,16 @@ int bar(int n){
// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]]
// CHECK1-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK1-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 [[CONV18]], ptr [[I9_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0
// CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1
// CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]]
// CHECK1-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64
// CHECK1-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP18]], [[CONV22]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK1-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
// CHECK1-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
@@ -1335,14 +1527,14 @@ int bar(int n){
// CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1
// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]]
// CHECK1-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32
-// CHECK1-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4
+// CHECK1-NEXT: store i32 [[CONV32]], ptr [[J10_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10_ASCAST]], align 4
// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP24]] to i64
// CHECK1-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM34]]
// CHECK1-NEXT: store i32 [[ADD33]], ptr [[ARRAYIDX35]], align 4
@@ -1350,15 +1542,15 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP25]], [[TMP26]]
-// CHECK1-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -1369,30 +1561,37 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -1402,131 +1601,148 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK1: cond.true10:
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12:%.*]]
// CHECK1: cond.false11:
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12]]
// CHECK1: cond.end12:
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
-// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -1537,77 +1753,94 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]]
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]]
// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4
@@ -1615,15 +1848,15 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -1634,33 +1867,41 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10:![0-9]+]], !align [[META11:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1670,142 +1911,160 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
-// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK2-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8
-// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8
-// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
// CHECK2-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr
// CHECK2-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
-// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK2: cond.true10:
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12:%.*]]
// CHECK2: cond.false11:
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12]]
// CHECK2: cond.end12:
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ]
-// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]])
-// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
// CHECK2-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK2: .omp.lastprivate.then:
-// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP48]], ptr [[L_ADDR]], align 4
+// CHECK2-NEXT: [[TMP48:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP48]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK2: .omp.lastprivate.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -1816,128 +2075,145 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32)
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 32)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK2: omp.dispatch.body:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK2-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4
-// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1
-// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK2: omp.dispatch.inc:
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
-// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK2: .omp.lastprivate.then:
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP30]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK2: .omp.lastprivate.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -1948,27 +2224,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META12:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1978,126 +2260,142 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META12]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
-// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
-// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK2: cond.true10:
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12:%.*]]
// CHECK2: cond.false11:
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12]]
// CHECK2: cond.end12:
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
-// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -2108,70 +2406,86 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META12]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
@@ -2183,15 +2497,15 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -2202,21 +2516,25 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2226,88 +2544,99 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr
// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr
// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK2: cond.true5:
// CHECK2-NEXT: br label [[COND_END7:%.*]]
// CHECK2: cond.false6:
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END7]]
// CHECK2: cond.end7:
// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ]
-// CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2319,52 +2648,64 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -2374,10 +2715,10 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2389,27 +2730,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2419,100 +2766,116 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr
// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr
// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
-// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99
// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK2: cond.true6:
// CHECK2-NEXT: br label [[COND_END8:%.*]]
// CHECK2: cond.false7:
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END8]]
// CHECK2: cond.end8:
// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ]
-// CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2524,77 +2887,93 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]]
// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
-// CHECK2-NEXT: store i32 10, ptr [[K]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4
+// CHECK2-NEXT: store i32 10, ptr [[K_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]]
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K_ASCAST]], align 4
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]]
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64
// CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4
@@ -2602,10 +2981,10 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2617,27 +2996,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2647,141 +3032,161 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I8]] to ptr
+// CHECK2-NEXT: [[J9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J9]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]]
// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1
-// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: land.lhs.true:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr
// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr
// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr
// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8
-// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 8
-// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]]
-// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
-// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
-// CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]]
// CHECK2-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]]
// CHECK2: cond.true16:
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END18:%.*]]
// CHECK2: cond.false17:
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END18]]
// CHECK2: cond.end18:
// CHECK2-NEXT: [[COND19:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE16]] ], [ [[TMP41]], [[COND_FALSE17]] ]
-// CHECK2-NEXT: store i32 [[COND19]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND19]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -2792,97 +3197,117 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J10:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I9]] to ptr
+// CHECK2-NEXT: [[J10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J10]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]]
// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1
-// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: land.lhs.true:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
-// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP9]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV8]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV8]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV11:%.*]] = sext i32 [[TMP13]] to i64
-// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[CONV11]], [[TMP14]]
// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0
// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
// CHECK2-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]]
// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[TMP15]], [[MUL15]]
// CHECK2-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL17]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I9]], align 4
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I9_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP19]], 0
// CHECK2-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1
// CHECK2-NEXT: [[MUL20:%.*]] = mul nsw i32 1, [[DIV19]]
// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[TMP18]], [[MUL20]]
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK2-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
// CHECK2-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
@@ -2890,14 +3315,14 @@ int bar(int n){
// CHECK2-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP17]], [[MUL25]]
// CHECK2-NEXT: [[MUL27:%.*]] = mul nsw i32 [[SUB26]], 1
// CHECK2-NEXT: [[ADD28:%.*]] = add nsw i32 0, [[MUL27]]
-// CHECK2-NEXT: store i32 [[ADD28]], ptr [[J10]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4
+// CHECK2-NEXT: store i32 [[ADD28]], ptr [[J10_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10_ASCAST]], align 4
// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64
// CHECK2-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM30]]
// CHECK2-NEXT: store i32 [[ADD29]], ptr [[ARRAYIDX31]], align 4
@@ -2905,15 +3330,15 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
-// CHECK2-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -2924,30 +3349,37 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2957,131 +3389,148 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
-// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8
-// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8
-// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK2: cond.true10:
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12:%.*]]
// CHECK2: cond.false11:
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12]]
// CHECK2: cond.end12:
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
-// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -3092,77 +3541,94 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META10]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]]
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64
// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]]
// CHECK2-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4
@@ -3170,15 +3636,15 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -3189,33 +3655,41 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META10:![0-9]+]], !align [[META11:![0-9]+]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3225,140 +3699,158 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP18]], ptr [[L_CASTED]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP18]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr
// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr
// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr
// CHECK3-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4
-// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4
-// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
+// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 4
// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr
// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4
-// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 5)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK3: cond.true10:
-// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12:%.*]]
// CHECK3: cond.false11:
-// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12]]
// CHECK3: cond.end12:
// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
-// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
-// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
// CHECK3-NEXT: br i1 [[TMP45]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK3: .omp.lastprivate.then:
-// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP46]], ptr [[L_ADDR]], align 4
+// CHECK3-NEXT: [[TMP46:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP46]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK3: .omp.lastprivate.done:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -3369,123 +3861,140 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32)
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 32)
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK3: omp.dispatch.cond:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK3: omp.dispatch.body:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]]
// CHECK3-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4
-// CHECK3-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP20]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK3: omp.dispatch.inc:
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
+// CHECK3-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK3: omp.dispatch.end:
-// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
-// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK3: .omp.lastprivate.then:
-// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP30]], ptr [[L_ADDR]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP30]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK3: .omp.lastprivate.done:
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -3496,27 +4005,33 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META12:![0-9]+]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3526,124 +4041,140 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META12]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr
// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr
// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr
// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]]
// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK3: cond.true10:
-// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12:%.*]]
// CHECK3: cond.false11:
-// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12]]
// CHECK3: cond.end12:
// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ]
-// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -3654,67 +4185,83 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META12]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]]
// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
@@ -3725,15 +4272,15 @@ int bar(int n){
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -3744,21 +4291,25 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3768,86 +4319,97 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr
// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK3: cond.true5:
// CHECK3-NEXT: br label [[COND_END7:%.*]]
// CHECK3: cond.false6:
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END7]]
// CHECK3: cond.end7:
// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -3859,49 +4421,61 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]]
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1
@@ -3910,10 +4484,10 @@ int bar(int n){
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -3925,27 +4499,33 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK3-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3955,98 +4535,114 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK3-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr
// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr
// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99
// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK3: cond.true6:
// CHECK3-NEXT: br label [[COND_END8:%.*]]
// CHECK3: cond.false7:
-// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END8]]
// CHECK3: cond.end8:
// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ]
-// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -4058,83 +4654,99 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]]
// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
-// CHECK3-NEXT: store i32 [[ADD5]], ptr [[J]], align 4
-// CHECK3-NEXT: store i32 10, ptr [[K]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[ADD5]], ptr [[J_ASCAST]], align 4
+// CHECK3-NEXT: store i32 10, ptr [[K_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]]
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[K_ASCAST]], align 4
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]]
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]]
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[J_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]]
// CHECK3-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -4146,27 +4758,33 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -4176,144 +4794,164 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I9]] to ptr
+// CHECK3-NEXT: [[J10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J10]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: land.lhs.true:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
// CHECK3-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]])
-// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV11]])
+// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]]
// CHECK3-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8
+// CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1
// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]]
// CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
// CHECK3-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32
-// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK3-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to ptr
// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to ptr
// CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4
-// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to ptr
// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4
-// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 4
-// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP32]], [[TMP33]]
-// CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP34]], [[TMP35]]
-// CHECK3-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK3-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP36]], [[TMP37]]
-// CHECK3-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]]
// CHECK3-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]]
// CHECK3: cond.true18:
-// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: br label [[COND_END20:%.*]]
// CHECK3: cond.false19:
-// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK3-NEXT: br label [[COND_END20]]
// CHECK3: cond.end20:
// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE18]] ], [ [[TMP41]], [[COND_FALSE19]] ]
-// CHECK3-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: store i64 [[TMP42]], ptr [[DOTOMP_IV]], align 8
+// CHECK3-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP42]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -4324,84 +4962,104 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I11:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
+// CHECK3-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: land.lhs.true:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
+// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK3-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8
-// CHECK3-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8
-// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
-// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8
+// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1)
+// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64
// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]]
// CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0
// CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
@@ -4410,16 +5068,16 @@ int bar(int n){
// CHECK3-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
// CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK3-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: store i32 [[CONV21]], ptr [[I11_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0
// CHECK3-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
// CHECK3-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
// CHECK3-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]]
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK3-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
// CHECK3-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
@@ -4429,28 +5087,28 @@ int bar(int n){
// CHECK3-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
// CHECK3-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
// CHECK3-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
-// CHECK3-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I11]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[J12]], align 4
+// CHECK3-NEXT: store i32 [[CONV35]], ptr [[J12_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[J12_ASCAST]], align 4
// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I11]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP23]]
-// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J12]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J12_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]]
// CHECK3-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX37]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]]
-// CHECK3-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8
+// CHECK3-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -4461,30 +5119,37 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -4494,129 +5159,146 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to ptr
// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to ptr
// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to ptr
// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 4
-// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
+// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 4
// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP26]], align 4
-// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 5)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK3: cond.true10:
-// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12:%.*]]
// CHECK3: cond.false11:
-// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12]]
// CHECK3: cond.end12:
// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
-// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -4627,88 +5309,105 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META10]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 [[TMP14]]
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]]
// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX5]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
index e687a537ecf1e..6a0a33f912711 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
@@ -32,33 +32,41 @@ int main(int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -68,135 +76,153 @@ int main(int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: [[ARGC_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[ARGC_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr
// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
// CHECK1: cond.true11:
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END13:%.*]]
// CHECK1: cond.false12:
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END13]]
// CHECK1: cond.end13:
// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ]
-// CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -207,121 +233,138 @@ int main(int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I5]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]])
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[TMP7]])
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[TMP11]] to i32
// CHECK1-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[TMP10]], [[CONV6]]
// CHECK1-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP12]] to i32
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV8]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
// CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I5]], align 4
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5]]) #[[ATTR5:[0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I5_ASCAST]], align 4
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I5_ASCAST]]) #[[ATTR5:[0-9]+]]
// CHECK1-NEXT: [[CALL11:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]]
// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CALL]], [[CALL11]]
-// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]]
+// CHECK1-NEXT: [[CALL13:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR_ASCAST]]) #[[ATTR5]]
// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[ADD12]], [[CALL13]]
// CHECK1-NEXT: store i32 [[ADD14]], ptr [[TMP0]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
-// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP26]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -332,33 +375,41 @@ int main(int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -368,133 +419,151 @@ int main(int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK2-NEXT: [[ARGC_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED]], align 4
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP16]], ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr
// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr
// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr
// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4
-// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
+// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 4
// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr
// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4
-// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 5)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]]
// CHECK2: cond.true11:
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END13:%.*]]
// CHECK2: cond.false12:
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END13]]
// CHECK2: cond.end13:
// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ]
-// CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND14]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -505,117 +574,134 @@ int main(int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]])
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[TMP7]])
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK2: omp.dispatch.body:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR5:[0-9]+]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4_ASCAST]]) #[[ATTR5:[0-9]+]]
// CHECK2-NEXT: [[CALL8:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR5]]
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CALL]], [[CALL8]]
-// CHECK2-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR5]]
+// CHECK2-NEXT: [[CALL10:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR_ASCAST]]) #[[ATTR5]]
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD9]], [[CALL10]]
// CHECK2-NEXT: store i32 [[ADD11]], ptr [[TMP0]], align 4
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1
-// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK2: omp.dispatch.inc:
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP23]], [[TMP24]]
-// CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
-// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP26]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
index 9aee4bd09282a..6ffae825e196b 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -62,33 +62,41 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -98,154 +106,172 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED]], align 8, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i64, ptr [[L_CASTED_ASCAST]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to ptr
-// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to ptr
-// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to ptr
-// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK1-NEXT: store ptr [[TMP27]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP28]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP21]] to ptr
-// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5), !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: store ptr [[TMP30]], ptr [[TMP29]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[TMP31]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5), !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK1: cond.true10:
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: br label [[COND_END12:%.*]]
// CHECK1: cond.false11:
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK1-NEXT: br label [[COND_END12]]
// CHECK1: cond.end12:
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE10]] ], [ [[TMP42]], [[COND_FALSE11]] ]
-// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: store i32 [[TMP43]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP44:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP45:%.*]] = load i32, ptr [[TMP44]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP45]])
-// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0
// CHECK1-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP48:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP48]], 0
// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[I3_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
-// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0
// CHECK1-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK1: .omp.lastprivate.then:
-// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP51]], ptr [[L_ADDR]], align 4
+// CHECK1-NEXT: [[TMP51:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP51]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK1: .omp.lastprivate.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -256,140 +282,157 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 noundef [[L:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32)
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 32)
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP10]] to i32
// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[CONV5]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV7:%.*]] = trunc i64 [[TMP11]] to i32
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[CONV7]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK1: omp.dispatch.body:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I4_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK1-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I4_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK1-NEXT: store i32 [[TMP20]], ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], 1
-// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP30]], 0
// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 [[DIV14]], 1
// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL15]]
-// CHECK1-NEXT: store i32 [[ADD16]], ptr [[I4]], align 4
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK1: .omp.lastprivate.then:
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP33]], ptr [[L_ADDR]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP33]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK1: .omp.lastprivate.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -400,27 +443,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META17:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -430,138 +479,154 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META17]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
-// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
-// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
-// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK1: cond.true10:
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: br label [[COND_END12:%.*]]
// CHECK1: cond.false11:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK1-NEXT: br label [[COND_END12]]
// CHECK1: cond.end12:
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
-// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
-// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
// CHECK1-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0
// CHECK1-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[I3_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -572,102 +637,118 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META17]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP28]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP14]] to i32
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1
// CHECK1-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16
-// CHECK1-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK1-NEXT: store i16 [[CONV9]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB11:%.*]] = sub nsw i32 [[TMP21]], 0
// CHECK1-NEXT: [[DIV12:%.*]] = sdiv i32 [[SUB11]], 1
// CHECK1-NEXT: [[MUL13:%.*]] = mul nsw i32 [[DIV12]], 1
// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 0, [[MUL13]]
-// CHECK1-NEXT: store i32 [[ADD14]], ptr [[I4]], align 4
+// CHECK1-NEXT: store i32 [[ADD14]], ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -678,21 +759,25 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -702,98 +787,109 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr
-// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr
-// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3), !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3), !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK1: cond.true5:
// CHECK1-NEXT: br label [[COND_END7:%.*]]
// CHECK1: cond.false6:
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK1-NEXT: br label [[COND_END7]]
// CHECK1: cond.end7:
// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ]
-// CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0
// CHECK1-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK1-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: ret void
@@ -802,75 +898,87 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]]
// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP34]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP27]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK1-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: ret void
@@ -879,27 +987,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -909,111 +1023,127 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]]
// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr
-// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr
-// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
-// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !llvm.access.group [[ACC_GRP37]]
+// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !llvm.access.group [[ACC_GRP30]]
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99
// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK1: cond.true6:
// CHECK1-NEXT: br label [[COND_END8:%.*]]
// CHECK1: cond.false7:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK1-NEXT: br label [[COND_END8]]
// CHECK1: cond.end8:
// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ]
-// CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
+// CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
// CHECK1-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK1-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: ret void
@@ -1022,99 +1152,115 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]]
// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]]
// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK1-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK1-NEXT: store i32 10, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
-// CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK1: .omp.final.then:
-// CHECK1-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK1-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK1-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK1: .omp.final.done:
// CHECK1-NEXT: ret void
@@ -1123,33 +1269,41 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1159,152 +1313,170 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[L_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[L_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to ptr
-// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to ptr
-// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to ptr
-// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
+// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 4
// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP19]] to ptr
-// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5), !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 5), !llvm.access.group [[ACC_GRP10]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK2: cond.true10:
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK2-NEXT: br label [[COND_END12:%.*]]
// CHECK2: cond.false11:
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK2-NEXT: br label [[COND_END12]]
// CHECK2: cond.end12:
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
-// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
-// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0
// CHECK2-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
-// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP46:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP46]], 0
// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[I3_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
-// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0
// CHECK2-NEXT: br i1 [[TMP48]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK2: .omp.lastprivate.then:
-// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP49]], ptr [[L_ADDR]], align 4
+// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP49]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK2: .omp.lastprivate.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -1315,135 +1487,152 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 noundef [[L:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 32)
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 32)
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK2: omp.dispatch.cond:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK2: omp.dispatch.body:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14:![0-9]+]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP19]]
-// CHECK2-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK2-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK2-NEXT: store i32 [[TMP20]], ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP14]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK2: omp.dispatch.inc:
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
+// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK2: omp.dispatch.end:
-// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP27]])
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0
// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0
// CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
-// CHECK2-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[I3_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0
// CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK2: .omp.lastprivate.then:
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP33]], ptr [[L_ADDR]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP33]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK2: .omp.lastprivate.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -1454,27 +1643,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META17:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1484,136 +1679,152 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META17]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: store i32 [[TMP16]], ptr [[N_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr
-// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr
-// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr
-// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 4), !llvm.access.group [[ACC_GRP18]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
-// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK2: cond.true10:
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK2-NEXT: br label [[COND_END12:%.*]]
// CHECK2: cond.false11:
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK2-NEXT: br label [[COND_END12]]
// CHECK2: cond.end12:
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ]
-// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]])
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0
// CHECK2-NEXT: br i1 [[TMP41]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP42]], 0
// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1
// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD16]], ptr [[I3]], align 4
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[I3_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -1624,98 +1835,114 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META17]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]]
-// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP21]]
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1
// CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16
-// CHECK2-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP28]]
+// CHECK2-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP21]]
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0
// CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1
// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]]
-// CHECK2-NEXT: store i32 [[ADD11]], ptr [[I3]], align 4
+// CHECK2-NEXT: store i32 [[ADD11]], ptr [[I3_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -1726,21 +1953,25 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1750,96 +1981,107 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]]
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24:![0-9]+]]
// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr
-// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3), !llvm.access.group [[ACC_GRP31]]
+// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3), !llvm.access.group [[ACC_GRP24]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK2: cond.true5:
// CHECK2-NEXT: br label [[COND_END7:%.*]]
// CHECK2: cond.false6:
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
// CHECK2-NEXT: br label [[COND_END7]]
// CHECK2: cond.end7:
// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]]
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]]
+// CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP24]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP24:%.*]] = icmp ne i32 [[TMP23]], 0
// CHECK2-NEXT: br i1 [[TMP24]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
-// CHECK2-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK2-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
// CHECK2-NEXT: ret void
@@ -1848,71 +2090,83 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34:![0-9]+]]
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27:![0-9]+]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]]
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1
-// CHECK2-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK2-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP34]]
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP34]]
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]]
+// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP27]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0
// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
-// CHECK2-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK2-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
// CHECK2-NEXT: ret void
@@ -1921,27 +2175,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1951,109 +2211,125 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]]
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30:![0-9]+]]
// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[F_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr
-// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr
-// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK2-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr
-// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4), !llvm.access.group [[ACC_GRP37]]
+// CHECK2-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 4), !llvm.access.group [[ACC_GRP30]]
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99
// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK2: cond.true6:
// CHECK2-NEXT: br label [[COND_END8:%.*]]
// CHECK2: cond.false7:
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
// CHECK2-NEXT: br label [[COND_END8]]
// CHECK2: cond.end8:
// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ]
-// CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]]
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]]
+// CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP30]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0
// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
-// CHECK2-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK2-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
// CHECK2-NEXT: ret void
@@ -2062,94 +2338,110 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]]
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]]
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]]
// CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
-// CHECK2-NEXT: store i32 [[ADD5]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK2-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: store i32 [[ADD5]], ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK2-NEXT: store i32 10, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]]
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]]
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !llvm.access.group [[ACC_GRP40]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]]
-// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]]
+// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP33]]
+// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0
// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK2: .omp.final.then:
-// CHECK2-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK2-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK2-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK2: .omp.final.done:
// CHECK2-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
index 4fc93386236a3..87e155180f154 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_simd_codegen.cpp
@@ -70,33 +70,41 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK45-64-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-64-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK45-64-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK45-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED_ASCAST]], align 8
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -106,128 +114,143 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-64-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-64-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-64: omp.precond.then:
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
+// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-64: omp.dispatch.cond:
-// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK45-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-64: cond.true:
-// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[COND_END:%.*]]
// CHECK45-64: cond.false:
-// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[COND_END]]
// CHECK45-64: cond.end:
// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK45-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-64: omp.dispatch.body:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
-// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
-// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
-// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
-// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK45-64: omp.inner.for.end:
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-64: omp.dispatch.inc:
-// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-64: omp.dispatch.end:
-// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-64: .omp.final.then:
-// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK45-64-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK45-64-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK45-64-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
-// CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[I3_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-64: .omp.final.done:
-// CHECK45-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
// CHECK45-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK45-64: .omp.lastprivate.then:
-// CHECK45-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
+// CHECK45-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK45-64: .omp.lastprivate.done:
// CHECK45-64-NEXT: br label [[OMP_PRECOND_END]]
@@ -238,27 +261,33 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
// CHECK45-64-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK45-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -268,121 +297,135 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-64-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-64-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META14]]
+// CHECK45-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-64: omp.precond.then:
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-64: omp.dispatch.cond:
-// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK45-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-64: cond.true:
-// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[COND_END:%.*]]
// CHECK45-64: cond.false:
-// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[COND_END]]
// CHECK45-64: cond.end:
// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK45-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-64: omp.dispatch.body:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
-// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
-// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK45-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
-// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK45-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
// CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-64-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
-// CHECK45-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
-// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK45-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK45-64: omp.inner.for.end:
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-64: omp.dispatch.inc:
-// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-64: omp.dispatch.end:
-// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK45-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK45-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-64: .omp.final.then:
-// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK45-64-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK45-64-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
-// CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[I3_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-64: .omp.final.done:
// CHECK45-64-NEXT: br label [[OMP_PRECOND_END]]
@@ -393,21 +436,25 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
// CHECK45-64-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -417,92 +464,102 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-64: omp.dispatch.cond:
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK45-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-64: cond.true:
// CHECK45-64-NEXT: br label [[COND_END:%.*]]
// CHECK45-64: cond.false:
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[COND_END]]
// CHECK45-64: cond.end:
// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK45-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-64: omp.dispatch.body:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
-// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
-// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK45-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
-// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
-// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK45-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK45-64: omp.inner.for.end:
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-64: omp.dispatch.inc:
-// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK45-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-64: omp.dispatch.end:
// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK45-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-64: .omp.final.then:
-// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-64: .omp.final.done:
// CHECK45-64-NEXT: ret void
@@ -511,27 +568,33 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
// CHECK45-64-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-64-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK45-64-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK45-64-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK45-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK45-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-64: user_code.entry:
// CHECK45-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK45-64-NEXT: call void @__kmpc_target_deinit()
// CHECK45-64-NEXT: ret void
// CHECK45-64: worker.exit:
@@ -541,116 +604,130 @@ int bar(int n){
// CHECK45-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
// CHECK45-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR1]] {
// CHECK45-64-NEXT: entry:
-// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-64-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK45-64-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-64-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-64-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK45-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-64-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK45-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK45-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK45-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK45-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-64: omp.dispatch.cond:
-// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK45-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-64: cond.true:
// CHECK45-64-NEXT: br label [[COND_END:%.*]]
// CHECK45-64: cond.false:
-// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[COND_END]]
// CHECK45-64: cond.end:
// CHECK45-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK45-64-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-64: omp.dispatch.body:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-64: omp.inner.for.cond:
-// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
+// CHECK45-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK45-64-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-64: omp.inner.for.body:
-// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK45-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK45-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
// CHECK45-64-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK45-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
// CHECK45-64-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK45-64-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: store i32 10, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
// CHECK45-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
-// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
-// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP18]] to i64
// CHECK45-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
-// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-64: omp.body.continue:
// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-64: omp.inner.for.inc:
-// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK45-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK45-64: omp.inner.for.end:
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-64: omp.dispatch.inc:
-// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-64: omp.dispatch.end:
// CHECK45-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK45-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
// CHECK45-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-64: .omp.final.then:
-// CHECK45-64-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK45-64-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK45-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK45-64-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK45-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-64: .omp.final.done:
// CHECK45-64-NEXT: ret void
@@ -659,33 +736,41 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK45-32-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK45-32-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK45-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -695,127 +780,142 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-32-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-32: omp.precond.then:
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
+// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32: omp.dispatch.cond:
-// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK45-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32: cond.true:
-// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[COND_END:%.*]]
// CHECK45-32: cond.false:
-// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[COND_END]]
// CHECK45-32: cond.end:
// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK45-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32: omp.dispatch.body:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
-// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
-// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
-// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
-// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK45-32: omp.inner.for.end:
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32: omp.dispatch.inc:
-// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32: omp.dispatch.end:
-// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32: .omp.final.then:
-// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK45-32-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK45-32-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK45-32-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
-// CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[I3_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32: .omp.final.done:
-// CHECK45-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
// CHECK45-32-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK45-32: .omp.lastprivate.then:
-// CHECK45-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
+// CHECK45-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK45-32: .omp.lastprivate.done:
// CHECK45-32-NEXT: br label [[OMP_PRECOND_END]]
@@ -826,27 +926,33 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
// CHECK45-32-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK45-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -856,120 +962,134 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-32-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-32-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14]]
+// CHECK45-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-32: omp.precond.then:
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32: omp.dispatch.cond:
-// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK45-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32: cond.true:
-// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[COND_END:%.*]]
// CHECK45-32: cond.false:
-// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[COND_END]]
// CHECK45-32: cond.end:
// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK45-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32: omp.dispatch.body:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
-// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
-// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK45-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
-// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK45-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
// CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-32-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
-// CHECK45-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
-// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK45-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK45-32: omp.inner.for.end:
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32: omp.dispatch.inc:
-// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32: omp.dispatch.end:
-// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK45-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32: .omp.final.then:
-// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK45-32-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK45-32-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK45-32-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
-// CHECK45-32-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD15]], ptr [[I3_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32: .omp.final.done:
// CHECK45-32-NEXT: br label [[OMP_PRECOND_END]]
@@ -980,21 +1100,25 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
// CHECK45-32-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -1004,91 +1128,101 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32: omp.dispatch.cond:
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK45-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32: cond.true:
// CHECK45-32-NEXT: br label [[COND_END:%.*]]
// CHECK45-32: cond.false:
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[COND_END]]
// CHECK45-32: cond.end:
// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK45-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32: omp.dispatch.body:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
-// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
-// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK45-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
-// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
-// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
-// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK45-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK45-32: omp.inner.for.end:
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32: omp.dispatch.inc:
-// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK45-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32: omp.dispatch.end:
// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK45-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32: .omp.final.then:
-// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32: .omp.final.done:
// CHECK45-32-NEXT: ret void
@@ -1097,27 +1231,33 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
// CHECK45-32-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK45-32-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK45-32-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK45-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK45-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32: user_code.entry:
// CHECK45-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK45-32-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-NEXT: ret void
// CHECK45-32: worker.exit:
@@ -1127,114 +1267,128 @@ int bar(int n){
// CHECK45-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
// CHECK45-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] {
// CHECK45-32-NEXT: entry:
-// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK45-32-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-32-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK45-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK45-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32: omp.dispatch.cond:
-// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK45-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32: cond.true:
// CHECK45-32-NEXT: br label [[COND_END:%.*]]
// CHECK45-32: cond.false:
-// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[COND_END]]
// CHECK45-32: cond.end:
// CHECK45-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK45-32-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32: omp.dispatch.body:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32: omp.inner.for.cond:
-// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
+// CHECK45-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK45-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32: omp.inner.for.body:
-// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK45-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK45-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
// CHECK45-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK45-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
// CHECK45-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK45-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: store i32 10, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
// CHECK45-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
-// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
-// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]]
-// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32: omp.body.continue:
// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32: omp.inner.for.inc:
-// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK45-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK45-32: omp.inner.for.end:
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32: omp.dispatch.inc:
-// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32: omp.dispatch.end:
// CHECK45-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK45-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
// CHECK45-32-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32: .omp.final.then:
-// CHECK45-32-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK45-32-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK45-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK45-32-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK45-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32: .omp.final.done:
// CHECK45-32-NEXT: ret void
@@ -1243,33 +1397,41 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK45-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK45-32-EX-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -1279,127 +1441,142 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-32-EX-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-32-EX: omp.precond.then:
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
+// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32-EX: omp.dispatch.cond:
-// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32-EX: cond.true:
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK45-32-EX: cond.false:
-// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[COND_END]]
// CHECK45-32-EX: cond.end:
// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32-EX: omp.dispatch.body:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
-// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
-// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
-// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
-// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK45-32-EX: omp.inner.for.end:
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32-EX: omp.dispatch.inc:
-// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32-EX: omp.dispatch.end:
-// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32-EX: .omp.final.then:
-// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK45-32-EX-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK45-32-EX-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK45-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[I3_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32-EX: .omp.final.done:
-// CHECK45-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
// CHECK45-32-EX-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK45-32-EX: .omp.lastprivate.then:
-// CHECK45-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK45-32-EX: .omp.lastprivate.done:
// CHECK45-32-EX-NEXT: br label [[OMP_PRECOND_END]]
@@ -1410,27 +1587,33 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
// CHECK45-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -1440,120 +1623,134 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK45-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-32-EX-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14]]
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK45-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK45-32-EX: omp.precond.then:
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32-EX: omp.dispatch.cond:
-// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK45-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32-EX: cond.true:
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK45-32-EX: cond.false:
-// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[COND_END]]
// CHECK45-32-EX: cond.end:
// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK45-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32-EX: omp.dispatch.body:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
-// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
-// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK45-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
-// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
// CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
// CHECK45-32-EX-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
-// CHECK45-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
-// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK45-32-EX: omp.inner.for.end:
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32-EX: omp.dispatch.inc:
-// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32-EX: omp.dispatch.end:
-// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK45-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32-EX: .omp.final.then:
-// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK45-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK45-32-EX-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK45-32-EX-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK45-32-EX-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD15]], ptr [[I3_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32-EX: .omp.final.done:
// CHECK45-32-EX-NEXT: br label [[OMP_PRECOND_END]]
@@ -1564,21 +1761,25 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
// CHECK45-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -1588,91 +1789,101 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32-EX: omp.dispatch.cond:
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32-EX: cond.true:
// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK45-32-EX: cond.false:
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[COND_END]]
// CHECK45-32-EX: cond.end:
// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK45-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32-EX: omp.dispatch.body:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
-// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK45-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
-// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
-// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
-// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK45-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK45-32-EX: omp.inner.for.end:
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32-EX: omp.dispatch.inc:
-// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32-EX: omp.dispatch.end:
// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK45-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32-EX: .omp.final.then:
-// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK45-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32-EX: .omp.final.done:
// CHECK45-32-EX-NEXT: ret void
@@ -1681,27 +1892,33 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
// CHECK45-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
// CHECK45-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK45-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK45-32-EX: user_code.entry:
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK45-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK45-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK45-32-EX-NEXT: ret void
// CHECK45-32-EX: worker.exit:
@@ -1711,114 +1928,128 @@ int bar(int n){
// CHECK45-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
// CHECK45-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] {
// CHECK45-32-EX-NEXT: entry:
-// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK45-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK45-32-EX-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK45-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK45-32-EX-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK45-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK45-32-EX-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK45-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK45-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK45-32-EX: omp.dispatch.cond:
-// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK45-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK45-32-EX: cond.true:
// CHECK45-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK45-32-EX: cond.false:
-// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[COND_END]]
// CHECK45-32-EX: cond.end:
// CHECK45-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK45-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK45-32-EX: omp.dispatch.body:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK45-32-EX: omp.inner.for.cond:
-// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
+// CHECK45-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK45-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK45-32-EX: omp.inner.for.body:
-// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK45-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK45-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
// CHECK45-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK45-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
// CHECK45-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK45-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: store i32 10, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
// CHECK45-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
-// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
-// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK45-32-EX: omp.body.continue:
// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK45-32-EX: omp.inner.for.inc:
-// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK45-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK45-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK45-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK45-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK45-32-EX: omp.inner.for.end:
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK45-32-EX: omp.dispatch.inc:
-// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK45-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
+// CHECK45-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK45-32-EX: omp.dispatch.end:
// CHECK45-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK45-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK45-32-EX-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
// CHECK45-32-EX-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK45-32-EX: .omp.final.then:
-// CHECK45-32-EX-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK45-32-EX-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK45-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK45-32-EX-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK45-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK45-32-EX: .omp.final.done:
// CHECK45-32-EX-NEXT: ret void
@@ -1827,33 +2058,41 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK-64-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK-64-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK-64-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, ptr [[L_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -1863,128 +2102,143 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-64-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-64-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[L]], ptr [[L_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-64: omp.precond.then:
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-64-NEXT: store i32 [[TMP18]], ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-64-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
-// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK-64-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK-64-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
-// CHECK-64-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
+// CHECK-64-NEXT: store i32 [[ADD13]], ptr [[I3_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
-// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
// CHECK-64-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-64: .omp.lastprivate.then:
-// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
+// CHECK-64-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP31]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-64: .omp.lastprivate.done:
// CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
@@ -1995,27 +2249,33 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
// CHECK-64-SAME: (ptr noalias [[DYN_PTR:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-64-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2025,121 +2285,135 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-64-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-64-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META14]]
+// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-64-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-64-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-64: omp.precond.then:
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK-64-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK-64-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK-64-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
// CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-64-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
-// CHECK-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK-64-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
-// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK-64-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-64-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-64-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK-64-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK-64-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
-// CHECK-64-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
+// CHECK-64-NEXT: store i32 [[ADD15]], ptr [[I3_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: br label [[OMP_PRECOND_END]]
@@ -2150,21 +2424,25 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
// CHECK-64-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2174,92 +2452,102 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK-64-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-64-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK-64-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -2268,27 +2556,33 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
// CHECK-64-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK-64-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-64-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-64-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK-64-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK-64-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-64-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
// CHECK-64-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
// CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-64: user_code.entry:
// CHECK-64-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK-64-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK-64-NEXT: call void @__kmpc_target_deinit()
// CHECK-64-NEXT: ret void
// CHECK-64: worker.exit:
@@ -2298,116 +2592,130 @@ int bar(int n){
// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
// CHECK-64-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR1]] {
// CHECK-64-NEXT: entry:
-// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-64-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-64-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-64-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-64-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-64-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-64-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-64-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-64-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK-64-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-64-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK-64-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META8]], !align [[META9]]
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-64-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-64: omp.dispatch.cond:
-// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK-64-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-64: cond.true:
// CHECK-64-NEXT: br label [[COND_END:%.*]]
// CHECK-64: cond.false:
-// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[COND_END]]
// CHECK-64: cond.end:
// CHECK-64-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK-64-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-64: omp.dispatch.body:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-64: omp.inner.for.cond:
-// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
+// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-64-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-64: omp.inner.for.body:
-// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK-64-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
// CHECK-64-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK-64-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
// CHECK-64-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK-64-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK-64-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: store i32 10, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: [[TMP14:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
// CHECK-64-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
-// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP16:%.*]] = load i32, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
-// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64
// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP18]] to i64
// CHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
-// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-64: omp.body.continue:
// CHECK-64-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-64: omp.inner.for.inc:
-// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-64-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-64-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK-64-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-64-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK-64: omp.inner.for.end:
// CHECK-64-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-64: omp.dispatch.inc:
-// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB]], align 4
-// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-64-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-64-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-64-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB]], align 4
+// CHECK-64-NEXT: store i32 [[ADD15]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-64-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-64: omp.dispatch.end:
// CHECK-64-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-64-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-64-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
// CHECK-64-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-64: .omp.final.then:
-// CHECK-64-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK-64-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK-64-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK-64-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-64: .omp.final.done:
// CHECK-64-NEXT: ret void
@@ -2416,33 +2724,41 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK-32-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK-32-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK-32-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -2452,127 +2768,142 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32: omp.precond.then:
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-NEXT: store i32 [[TMP18]], ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-32-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
-// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK-32-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK-32-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK-32-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
-// CHECK-32-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
+// CHECK-32-NEXT: store i32 [[ADD13]], ptr [[I3_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
-// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
// CHECK-32-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-32: .omp.lastprivate.then:
-// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
+// CHECK-32-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP31]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-32: .omp.lastprivate.done:
// CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
@@ -2583,27 +2914,33 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
// CHECK-32-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -2613,120 +2950,134 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-32-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14]]
+// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-32-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32: omp.precond.then:
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK-32-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK-32-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK-32-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-32-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
-// CHECK-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
-// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK-32-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK-32-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK-32-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK-32-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
-// CHECK-32-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
+// CHECK-32-NEXT: store i32 [[ADD15]], ptr [[I3_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: br label [[OMP_PRECOND_END]]
@@ -2737,21 +3088,25 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
// CHECK-32-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -2761,91 +3116,101 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK-32-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK-32-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -2854,27 +3219,33 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
// CHECK-32-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK-32-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-32-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK-32-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK-32-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK-32-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32: user_code.entry:
// CHECK-32-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK-32-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-NEXT: ret void
// CHECK-32: worker.exit:
@@ -2884,114 +3255,128 @@ int bar(int n){
// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
// CHECK-32-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] {
// CHECK-32-NEXT: entry:
-// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-32-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK-32-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK-32-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32: omp.dispatch.cond:
-// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK-32-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32: cond.true:
// CHECK-32-NEXT: br label [[COND_END:%.*]]
// CHECK-32: cond.false:
-// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[COND_END]]
// CHECK-32: cond.end:
// CHECK-32-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK-32-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32: omp.dispatch.body:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32: omp.inner.for.cond:
-// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
+// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32: omp.inner.for.body:
-// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK-32-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
// CHECK-32-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK-32-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
// CHECK-32-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK-32-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK-32-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: store i32 10, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: [[TMP14:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
// CHECK-32-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
-// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP16:%.*]] = load i32, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
-// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]]
-// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32: omp.body.continue:
// CHECK-32-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32: omp.inner.for.inc:
-// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK-32-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK-32: omp.inner.for.end:
// CHECK-32-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32: omp.dispatch.inc:
-// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32: omp.dispatch.end:
// CHECK-32-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
// CHECK-32-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32: .omp.final.then:
-// CHECK-32-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK-32-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK-32-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK-32-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32: .omp.final.done:
// CHECK-32-NEXT: ret void
@@ -3000,33 +3385,41 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34
// CHECK-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[L_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8:![0-9]+]], !align [[META9:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[L_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[L_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -3036,127 +3429,142 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[L_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[L_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[L]], ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32-EX: omp.precond.then:
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 128)
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 128)
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-EX-NEXT: store i32 [[TMP18]], ptr [[L_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP18]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP10]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
-// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK-32-EX-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1
// CHECK-32-EX-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1
// CHECK-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]]
-// CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[I3]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[I3_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
-// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
// CHECK-32-EX: .omp.lastprivate.then:
-// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP31:%.*]] = load i32, ptr [[L_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP31]], ptr [[L_ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
// CHECK-32-EX: .omp.lastprivate.done:
// CHECK-32-EX-NEXT: br label [[OMP_PRECOND_END]]
@@ -3167,27 +3575,33 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40
// CHECK-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14:![0-9]+]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -3197,120 +3611,134 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l40_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], ptr nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK-32-EX-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META14]]
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK-32-EX-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK-32-EX: omp.precond.then:
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK-32-EX-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK-32-EX-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]]
// CHECK-32-EX-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32
// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV]], 1
// CHECK-32-EX-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i16
-// CHECK-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-EX-NEXT: store i16 [[CONV8]], ptr [[ARRAYIDX]], align 2, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP22]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP15]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
-// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP25]])
-// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP27]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK-32-EX-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK-32-EX-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP28]], 0
// CHECK-32-EX-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK-32-EX-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1
// CHECK-32-EX-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]]
-// CHECK-32-EX-NEXT: store i32 [[ADD15]], ptr [[I3]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD15]], ptr [[I3_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: br label [[OMP_PRECOND_END]]
@@ -3321,21 +3749,25 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45
// CHECK-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -3345,91 +3777,101 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l45_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK-32-EX-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP11]]
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-EX-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
// CHECK-32-EX-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP13]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP25]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP18]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP19]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
@@ -3438,27 +3880,33 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50
// CHECK-32-EX-SAME: (ptr noalias [[DYN_PTR:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK-32-EX-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
// CHECK-32-EX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_kernel_environment, ptr [[DYN_PTR]])
// CHECK-32-EX-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK-32-EX-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK-32-EX: user_code.entry:
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-32-EX-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK-32-EX-NEXT: call void @__kmpc_target_deinit()
// CHECK-32-EX-NEXT: ret void
// CHECK-32-EX: worker.exit:
@@ -3468,114 +3916,128 @@ int bar(int n){
// CHECK-32-EX-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l50_omp_outlined
// CHECK-32-EX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR1]] {
// CHECK-32-EX-NEXT: entry:
-// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-32-EX-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-32-EX-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-32-EX-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-32-EX-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-32-EX-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK-32-EX-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-32-EX-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK-32-EX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META8]], !align [[META9]]
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK-32-EX-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND:%.*]]
// CHECK-32-EX: omp.dispatch.cond:
-// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK-32-EX-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK-32-EX: cond.true:
// CHECK-32-EX-NEXT: br label [[COND_END:%.*]]
// CHECK-32-EX: cond.false:
-// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[COND_END]]
// CHECK-32-EX: cond.end:
// CHECK-32-EX-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]]
// CHECK-32-EX-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]]
// CHECK-32-EX: omp.dispatch.body:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK-32-EX: omp.inner.for.cond:
-// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28:![0-9]+]]
-// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21:![0-9]+]]
+// CHECK-32-EX-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]]
// CHECK-32-EX-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK-32-EX: omp.inner.for.body:
-// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK-32-EX-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK-32-EX-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 10
// CHECK-32-EX-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK-32-EX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]]
// CHECK-32-EX-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK-32-EX-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: store i32 10, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: store i32 10, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: [[TMP13:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: [[TMP14:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: [[TMP15:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP14]], [[TMP15]]
// CHECK-32-EX-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP13]], [[MUL8]]
-// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP16:%.*]] = load i32, ptr [[K_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP16]]
-// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP17:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP17]]
-// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP18:%.*]] = load i32, ptr [[J_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP18]]
-// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX11]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK-32-EX: omp.body.continue:
// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK-32-EX: omp.inner.for.inc:
-// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
+// CHECK-32-EX-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
// CHECK-32-EX-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1
-// CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP28]]
-// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]]
+// CHECK-32-EX-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP21]]
+// CHECK-32-EX-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
// CHECK-32-EX: omp.inner.for.end:
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_INC:%.*]]
// CHECK-32-EX: omp.dispatch.inc:
-// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB]], align 4
-// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
-// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB]], align 4
+// CHECK-32-EX-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_UB_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[OMP_DISPATCH_COND]]
// CHECK-32-EX: omp.dispatch.end:
// CHECK-32-EX-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-32-EX-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK-32-EX-NEXT: [[TMP25:%.*]] = icmp ne i32 [[TMP24]], 0
// CHECK-32-EX-NEXT: br i1 [[TMP25]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]]
// CHECK-32-EX: .omp.final.then:
-// CHECK-32-EX-NEXT: store i32 10, ptr [[I]], align 4
-// CHECK-32-EX-NEXT: store i32 10, ptr [[J]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[I_ASCAST]], align 4
+// CHECK-32-EX-NEXT: store i32 10, ptr [[J_ASCAST]], align 4
// CHECK-32-EX-NEXT: br label [[DOTOMP_FINAL_DONE]]
// CHECK-32-EX: .omp.final.done:
// CHECK-32-EX-NEXT: ret void
diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
index c89c6eb65706a..0640327a07406 100644
--- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_codegen.cpp
@@ -68,27 +68,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -98,126 +104,142 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK1-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK1-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK1: cond.true10:
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12:%.*]]
// CHECK1: cond.false11:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12]]
// CHECK1: cond.end12:
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
-// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -228,70 +250,86 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
@@ -303,15 +341,15 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -322,29 +360,35 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
-// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -354,90 +398,102 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr
// CHECK1-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK1: cond.true5:
// CHECK1-NEXT: br label [[COND_END7:%.*]]
// CHECK1: cond.false6:
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END7]]
// CHECK1: cond.end7:
// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ]
-// CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -449,52 +505,64 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -504,10 +572,10 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -519,27 +587,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -549,100 +623,116 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr
// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr
// CHECK1-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
// CHECK1-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99
// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK1: cond.true6:
// CHECK1-NEXT: br label [[COND_END8:%.*]]
// CHECK1: cond.false7:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END8]]
// CHECK1: cond.end8:
// CHECK1-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ]
-// CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -654,77 +744,93 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]]
// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
-// CHECK1-NEXT: store i32 10, ptr [[K]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4
+// CHECK1-NEXT: store i32 10, ptr [[K_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[K_ASCAST]], align 4
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[J_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64
// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
// CHECK1-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4
@@ -732,10 +838,10 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -747,27 +853,33 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -777,142 +889,162 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I9]] to ptr
+// CHECK1-NEXT: [[J10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J10]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: land.lhs.true:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
// CHECK1-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]])
-// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV11]])
+// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]]
// CHECK1-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1
// CHECK1-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP19]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP19]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr
// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr
// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP28:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP28]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP31:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP30]], [[TMP31]]
-// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP32]], [[TMP33]]
-// CHECK1-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP34]], [[TMP35]]
-// CHECK1-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP36]], [[TMP37]]
// CHECK1-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]]
// CHECK1: cond.true18:
-// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK1-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK1-NEXT: br label [[COND_END20:%.*]]
// CHECK1: cond.false19:
-// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK1-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK1-NEXT: br label [[COND_END20]]
// CHECK1: cond.end20:
// CHECK1-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP38]], [[COND_TRUE18]] ], [ [[TMP39]], [[COND_FALSE19]] ]
-// CHECK1-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP40]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP40]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP41:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP42:%.*]] = load i32, ptr [[TMP41]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP42]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -923,81 +1055,101 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[J10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I9]] to ptr
+// CHECK1-NEXT: [[J10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J10]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK1-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: land.lhs.true:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB]], align 8
-// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
-// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1)
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP11:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]]
// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP16]], 0
// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1
// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 1, [[DIV13]]
@@ -1006,16 +1158,16 @@ int bar(int n){
// CHECK1-NEXT: [[MUL17:%.*]] = mul nsw i64 [[DIV16]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL17]]
// CHECK1-NEXT: [[CONV18:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK1-NEXT: store i32 [[CONV18]], ptr [[I9]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 [[CONV18]], ptr [[I9_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0
// CHECK1-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1
// CHECK1-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]]
// CHECK1-NEXT: [[CONV22:%.*]] = sext i32 [[MUL21]] to i64
// CHECK1-NEXT: [[DIV23:%.*]] = sdiv i64 [[TMP18]], [[CONV22]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK1-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1
// CHECK1-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]]
@@ -1025,14 +1177,14 @@ int bar(int n){
// CHECK1-NEXT: [[MUL30:%.*]] = mul nsw i64 [[SUB29]], 1
// CHECK1-NEXT: [[ADD31:%.*]] = add nsw i64 0, [[MUL30]]
// CHECK1-NEXT: [[CONV32:%.*]] = trunc i64 [[ADD31]] to i32
-// CHECK1-NEXT: store i32 [[CONV32]], ptr [[J10]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4
+// CHECK1-NEXT: store i32 [[CONV32]], ptr [[J10_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10_ASCAST]], align 4
// CHECK1-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM34:%.*]] = sext i32 [[TMP24]] to i64
// CHECK1-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM34]]
// CHECK1-NEXT: store i32 [[ADD33]], ptr [[ARRAYIDX35]], align 4
@@ -1040,15 +1192,15 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK1-NEXT: [[ADD36:%.*]] = add nsw i64 [[TMP25]], [[TMP26]]
-// CHECK1-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV]], align 8
+// CHECK1-NEXT: store i64 [[ADD36]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -1059,30 +1211,37 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR4]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -1092,131 +1251,148 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK1-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK1-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
-// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK1-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK1-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8
-// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
// CHECK1-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8
-// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK1: cond.true10:
-// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12:%.*]]
// CHECK1: cond.false11:
-// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END12]]
// CHECK1: cond.end12:
// CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
-// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -1227,77 +1403,94 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK1-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]]
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64
// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]]
// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4
@@ -1305,15 +1498,15 @@ int bar(int n){
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -1324,27 +1517,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1354,126 +1553,142 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK2-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK2-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8
-// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK2-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8
-// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP26]], align 8
-// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK2: cond.true10:
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12:%.*]]
// CHECK2: cond.false11:
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12]]
// CHECK2: cond.end12:
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
-// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -1484,70 +1699,86 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META10]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
@@ -1559,15 +1790,15 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -1578,29 +1809,35 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
-// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK2-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK2-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK2-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1610,90 +1847,102 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to ptr
// CHECK2-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to ptr
// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 3)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 3)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK2: cond.true5:
// CHECK2-NEXT: br label [[COND_END7:%.*]]
// CHECK2: cond.false6:
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END7]]
// CHECK2: cond.end7:
// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ]
-// CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP24]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -1705,52 +1954,64 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV1]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
@@ -1760,10 +2021,10 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -1775,27 +2036,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i64 [[TMP4]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1805,100 +2072,116 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[F_CASTED]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED]], align 8
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[F_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr
// CHECK2-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr
// CHECK2-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
-// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP17]], align 8
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP12]] to ptr
// CHECK2-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 8
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
-// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99
// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK2: cond.true6:
// CHECK2-NEXT: br label [[COND_END8:%.*]]
// CHECK2: cond.false7:
-// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END8]]
// CHECK2: cond.end8:
// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ]
-// CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -1910,77 +2193,93 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i64 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[F]], ptr [[F_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
-// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[TMP6]] to i64
-// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV3]], [[TMP7]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 10
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL5]]
// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J]], align 4
-// CHECK2-NEXT: store i32 10, ptr [[K]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[J_ASCAST]], align 4
+// CHECK2-NEXT: store i32 10, ptr [[K_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[MUL8:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP11]], [[MUL8]]
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[K_ASCAST]], align 4
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD9]], [[TMP14]]
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[J_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP16]] to i64
// CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM11]]
// CHECK2-NEXT: store i32 [[ADD10]], ptr [[ARRAYIDX12]], align 4
@@ -1988,10 +2287,10 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2003,27 +2302,33 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2033,141 +2338,161 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I8]] to ptr
+// CHECK2-NEXT: [[J9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J9]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]]
// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1
-// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: land.lhs.true:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1
// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to ptr
// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to ptr
// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to ptr
// CHECK2-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 8
-// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 8
-// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], [[TMP33]]
-// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD12]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]]
-// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD13]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]]
-// CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: store i32 [[ADD14]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: [[CMP15:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]]
// CHECK2-NEXT: br i1 [[CMP15]], label [[COND_TRUE16:%.*]], label [[COND_FALSE17:%.*]]
// CHECK2: cond.true16:
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END18:%.*]]
// CHECK2: cond.false17:
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END18]]
// CHECK2: cond.end18:
// CHECK2-NEXT: [[COND19:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE16]] ], [ [[TMP41]], [[COND_FALSE17]] ]
-// CHECK2-NEXT: store i32 [[COND19]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND19]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP42:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP42]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -2178,97 +2503,117 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I9:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[J10:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[J10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I9]] to ptr
+// CHECK2-NEXT: [[J10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J10]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]]
// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1
-// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB6]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: land.lhs.true:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP8]] to i32
-// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP9]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV8]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV8]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV11:%.*]] = sext i32 [[TMP13]] to i64
-// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[CONV11]], [[TMP14]]
// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0
// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1
// CHECK2-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]]
// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[TMP15]], [[MUL15]]
// CHECK2-NEXT: [[MUL17:%.*]] = mul nsw i32 [[DIV16]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL17]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I9]], align 4
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I9_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP19]], 0
// CHECK2-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1
// CHECK2-NEXT: [[MUL20:%.*]] = mul nsw i32 1, [[DIV19]]
// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[TMP18]], [[MUL20]]
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK2-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
// CHECK2-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
@@ -2276,14 +2621,14 @@ int bar(int n){
// CHECK2-NEXT: [[SUB26:%.*]] = sub nsw i32 [[TMP17]], [[MUL25]]
// CHECK2-NEXT: [[MUL27:%.*]] = mul nsw i32 [[SUB26]], 1
// CHECK2-NEXT: [[ADD28:%.*]] = add nsw i32 0, [[MUL27]]
-// CHECK2-NEXT: store i32 [[ADD28]], ptr [[J10]], align 4
-// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10]], align 4
+// CHECK2-NEXT: store i32 [[ADD28]], ptr [[J10_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP21:%.*]] = load i32, ptr [[I9_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP22:%.*]] = load i32, ptr [[J10_ASCAST]], align 4
// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load i32, ptr [[I9_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
-// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10]], align 4
+// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[J10_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM30:%.*]] = sext i32 [[TMP24]] to i64
// CHECK2-NEXT: [[ARRAYIDX31:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i64 0, i64 [[IDXPROM30]]
// CHECK2-NEXT: store i32 [[ADD29]], ptr [[ARRAYIDX31]], align 4
@@ -2291,15 +2636,15 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i32 [[TMP25]], [[TMP26]]
-// CHECK2-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD32]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -2310,30 +2655,37 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR4]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -2343,131 +2695,148 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK2-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4
-// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8
-// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to ptr
// CHECK2-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 8
-// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
+// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to ptr
// CHECK2-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8
-// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
+// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to ptr
// CHECK2-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 8
-// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
+// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
// CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP27]], align 8
-// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
+// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 4
// CHECK2-NEXT: store ptr [[TMP20]], ptr [[TMP28]], align 8
-// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP29:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP29]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 5)
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 5)
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]]
-// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]]
// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK2: cond.true10:
-// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12:%.*]]
// CHECK2: cond.false11:
-// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP40:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END12]]
// CHECK2: cond.end12:
// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE10]] ], [ [[TMP40]], [[COND_FALSE11]] ]
-// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP41:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP41]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP42:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP43:%.*]] = load i32, ptr [[TMP42]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP43]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -2478,77 +2847,94 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 8
-// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK2-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META9]], !align [[META11]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK2-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP5]] to i32
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP6]] to i32
-// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64
-// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR]], align 8
+// CHECK2-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP11]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 8
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 8
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]]
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[I4_ASCAST]], align 4
// CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP16]] to i64
// CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM7]]
// CHECK2-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX8]], align 4
@@ -2556,15 +2942,15 @@ int bar(int n){
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
@@ -2575,27 +2961,33 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -2605,124 +2997,140 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META10]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to ptr
// CHECK3-NEXT: store ptr [[TMP19]], ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to ptr
// CHECK3-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to ptr
// CHECK3-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP24]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP26]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], [[TMP28]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP33]], [[TMP34]]
// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK3: cond.true10:
-// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12:%.*]]
// CHECK3: cond.false11:
-// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12]]
// CHECK3: cond.end12:
// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP35]], [[COND_TRUE10]] ], [ [[TMP36]], [[COND_FALSE11]] ]
-// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP37]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[TMP38]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP39]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -2733,67 +3141,83 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[AA_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[AA_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[AA]], ptr [[AA_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AA_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META10]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], ptr [[TMP0]], i32 0, i32 [[TMP13]]
// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
@@ -2804,15 +3228,15 @@ int bar(int n){
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]]
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP18]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -2823,29 +3247,35 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11:![0-9]+]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP3]] to i1
-// CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK3-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK3-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1
+// CHECK3-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK3-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -2855,88 +3285,100 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10
// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to ptr
// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP13]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 3)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 3)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
-// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP20]], 9
// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]]
// CHECK3: cond.true5:
// CHECK3-NEXT: br label [[COND_END7:%.*]]
// CHECK3: cond.false6:
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END7]]
// CHECK3: cond.end7:
// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP21]], [[COND_FALSE6]] ]
-// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP22]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -2948,49 +3390,61 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l33_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 [[TMP9]]
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1
@@ -2999,10 +3453,10 @@ int bar(int n){
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]]
-// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD2]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -3014,27 +3468,33 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR4]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK3-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[TMP0]], i32 [[TMP4]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3044,98 +3504,114 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK3-NEXT: [[F_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100
// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[F_CASTED]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[F_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[F_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to ptr
// CHECK3-NEXT: store ptr [[TMP12]], ptr [[TMP11]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to ptr
// CHECK3-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP15]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: [[TMP17:%.*]] = inttoptr i32 [[TMP10]] to ptr
// CHECK3-NEXT: store ptr [[TMP17]], ptr [[TMP16]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
-// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD3]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
-// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP24]], 99
// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]]
// CHECK3: cond.true6:
// CHECK3-NEXT: br label [[COND_END8:%.*]]
// CHECK3: cond.false7:
-// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END8]]
// CHECK3: cond.end8:
// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP25]], [[COND_FALSE7]] ]
-// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND9]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP26]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -3147,83 +3623,99 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[F:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[K_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[K]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[F]], ptr [[F_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10
// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]]
// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1
// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]]
-// CHECK3-NEXT: store i32 [[ADD5]], ptr [[J]], align 4
-// CHECK3-NEXT: store i32 10, ptr [[K]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR]], align 4
+// CHECK3-NEXT: store i32 [[ADD5]], ptr [[J_ASCAST]], align 4
+// CHECK3-NEXT: store i32 10, ptr [[K_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[J_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[F_ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]]
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]]
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[K]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[K_ASCAST]], align 4
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]]
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[I_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP15]]
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[J]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[J_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP16]]
// CHECK3-NEXT: store i32 [[ADD8]], ptr [[ARRAYIDX9]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD10]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
@@ -3235,27 +3727,33 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR4]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3265,144 +3763,164 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I9]] to ptr
+// CHECK3-NEXT: [[J10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J10]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: land.lhs.true:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
// CHECK3-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]])
-// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_8(ptr @[[GLOB2]], i32 [[TMP9]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 [[CONV11]])
+// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]]
// CHECK3-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV]], align 8
+// CHECK3-NEXT: store i64 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP14]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1
// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]]
// CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
+// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
// CHECK3-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32
-// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK3-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK3-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP21]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to ptr
// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to ptr
// CHECK3-NEXT: store ptr [[TMP26]], ptr [[TMP25]], align 4
-// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to ptr
// CHECK3-NEXT: store ptr [[TMP28]], ptr [[TMP27]], align 4
-// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP29]], align 4
-// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP30]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 4)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 4)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK3-NEXT: [[TMP32:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP33:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP32]], [[TMP33]]
-// CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP34:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP35:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP34]], [[TMP35]]
-// CHECK3-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK3-NEXT: store i64 [[ADD15]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP36:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP36]], [[TMP37]]
-// CHECK3-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: store i64 [[ADD16]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP39:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]]
// CHECK3-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]]
// CHECK3: cond.true18:
-// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// CHECK3-NEXT: [[TMP40:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
// CHECK3-NEXT: br label [[COND_END20:%.*]]
// CHECK3: cond.false19:
-// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB]], align 8
+// CHECK3-NEXT: [[TMP41:%.*]] = load i64, ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
// CHECK3-NEXT: br label [[COND_END20]]
// CHECK3: cond.end20:
// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE18]] ], [ [[TMP41]], [[COND_FALSE19]] ]
-// CHECK3-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB]], align 8
-// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_LB]], align 8
-// CHECK3-NEXT: store i64 [[TMP42]], ptr [[DOTOMP_IV]], align 8
+// CHECK3-NEXT: store i64 [[COND21]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP42:%.*]] = load i64, ptr [[DOTOMP_COMB_LB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP42]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP43:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP44]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -3413,84 +3931,104 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I11:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I11:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_3]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[J_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I11]] to ptr
+// CHECK3-NEXT: [[J12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J12]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0
// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
-// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[J]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[J_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: land.lhs.true:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]]
// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
-// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
-// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB]], align 8
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
+// CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP7]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64
-// CHECK3-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB]], align 8
-// CHECK3-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB]], align 8
-// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store i64 [[CONV9]], ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[CONV10]], ptr [[DOTOMP_UB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
-// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
-// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV]], align 8
+// CHECK3-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB3]], i32 [[TMP11]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i64 1, i64 1)
+// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_LB_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64
// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]]
// CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0
// CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
@@ -3499,16 +4037,16 @@ int bar(int n){
// CHECK3-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]]
// CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32
-// CHECK3-NEXT: store i32 [[CONV21]], ptr [[I11]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: store i32 [[CONV21]], ptr [[I11_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP18:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0
// CHECK3-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1
// CHECK3-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]]
// CHECK3-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64
// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]]
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0
// CHECK3-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1
// CHECK3-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]]
@@ -3518,28 +4056,28 @@ int bar(int n){
// CHECK3-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1
// CHECK3-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]]
// CHECK3-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32
-// CHECK3-NEXT: store i32 [[CONV35]], ptr [[J12]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I11]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[J12]], align 4
+// CHECK3-NEXT: store i32 [[CONV35]], ptr [[J12_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP21:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP22:%.*]] = load i32, ptr [[J12_ASCAST]], align 4
// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]]
-// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I11]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[I11_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP0]], i32 0, i32 [[TMP23]]
-// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J12]], align 4
+// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[J12_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX]], i32 0, i32 [[TMP24]]
// CHECK3-NEXT: store i32 [[ADD36]], ptr [[ARRAYIDX37]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8
-// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[DOTOMP_IV_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP26:%.*]] = load i64, ptr [[DOTOMP_STRIDE_ASCAST]], align 8
// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]]
-// CHECK3-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV]], align 8
+// CHECK3-NEXT: store i64 [[ADD38]], ptr [[DOTOMP_IV_ASCAST]], align 8
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP28]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -3550,30 +4088,37 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR4]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], ptr [[TMP5]]) #[[ATTR2]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3583,129 +4128,146 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
-// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]])
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2]], i32 [[TMP6]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]]
// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP16]], ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to ptr
// CHECK3-NEXT: store ptr [[TMP20]], ptr [[TMP19]], align 4
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to ptr
// CHECK3-NEXT: store ptr [[TMP22]], ptr [[TMP21]], align 4
-// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 2
+// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 2
// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to ptr
// CHECK3-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4
-// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 3
+// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 3
// CHECK3-NEXT: store ptr [[TMP0]], ptr [[TMP25]], align 4
-// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 4
+// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 4
// CHECK3-NEXT: store ptr [[TMP18]], ptr [[TMP26]], align 4
-// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 5)
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 5)
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]]
-// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]]
-// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]]
// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]]
// CHECK3: cond.true10:
-// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK3-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12:%.*]]
// CHECK3: cond.false11:
-// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK3-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK3-NEXT: br label [[COND_END12]]
// CHECK3: cond.end12:
// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ]
-// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4
// CHECK3-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP41]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
@@ -3716,88 +4278,105 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l53_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[DOTPREVIOUS_LB_:%.*]], i32 noundef [[DOTPREVIOUS_UB_:%.*]], i32 noundef [[N:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[A:%.*]], ptr noundef [[V:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK3-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[V_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK3-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr
+// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK3-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK3-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK3-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK3-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK3-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[N]], ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[V]], ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META9]], !align [[META11]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// CHECK3-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK3: omp.precond.then:
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4
-// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3]], i32 [[TMP8]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP9]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK3: omp.inner.for.cond:
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]]
// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK3: omp.inner.for.body:
-// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1
// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3]], align 4
-// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR]], align 4
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: store i32 [[ADD]], ptr [[I3_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[V_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 [[TMP14]]
// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3]], align 4
+// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[I3_ASCAST]], align 4
// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i32 0, i32 [[TMP16]]
// CHECK3-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX5]], align 4
// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK3: omp.body.continue:
// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK3: omp.inner.for.inc:
-// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]]
-// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4
+// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK3: omp.inner.for.end:
// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp.loop.exit:
-// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP20]])
// CHECK3-NEXT: br label [[OMP_PRECOND_END]]
diff --git a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
index f0effa760dcdb..bb7febd971bce 100644
--- a/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_generic_loop_generic_mode_codegen.cpp
@@ -32,35 +32,43 @@ int main(int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_CASTED]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[ARGC_CASTED_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1
+// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -70,91 +78,106 @@ int main(int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK1-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK1: omp.precond.then:
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK1-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR4:[0-9]+]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4_ASCAST]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR4]]
// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
-// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR4]]
+// CHECK1-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
// CHECK1-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
+// CHECK1-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK1: omp.inner.for.end:
// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK1: omp.loop.exit:
-// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
// CHECK1-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
// CHECK1-NEXT: br label [[OMP_PRECOND_END]]
@@ -165,35 +188,43 @@ int main(int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_CASTED]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__CASTED]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1
-// CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8
-// CHECK2-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARGC_CASTED_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK2-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP5]] to i1
+// CHECK2-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8
+// CHECK2-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 1
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i32 [[TMP4]], ptr [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -203,91 +234,106 @@ int main(int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l24_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i32 noundef [[ARGC:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK2-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr
+// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_2]] to ptr
+// CHECK2-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK2-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK2-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK2-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META5]], !align [[META6]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[I]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// CHECK2-NEXT: store i32 [[SUB3]], ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
// CHECK2: omp.precond.then:
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
-// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_COMB_LB]], ptr [[DOTOMP_COMB_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP6]], i32 92, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]]
// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ]
-// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB]], align 4
-// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
// CHECK2: omp.inner.for.cond:
-// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB]], align 4
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]]
// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
// CHECK2: omp.inner.for.body:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4]], align 4
-// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4]]) #[[ATTR4:[0-9]+]]
+// CHECK2-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4
+// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[I4_ASCAST]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: [[CALL7:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[TMP0]]) #[[ATTR4]]
// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]]
-// CHECK2-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR]]) #[[ATTR4]]
+// CHECK2-NEXT: [[CALL9:%.*]] = call noundef i32 @_Z3fooPi(ptr noundef [[ARGC_ADDR_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]]
// CHECK2-NEXT: store i32 [[ADD10]], ptr [[TMP0]], align 4
// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK2: omp.body.continue:
// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
// CHECK2: omp.inner.for.inc:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], 1
-// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV]], align 4
+// CHECK2-NEXT: store i32 [[ADD11]], ptr [[DOTOMP_IV_ASCAST]], align 4
// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]]
// CHECK2: omp.inner.for.end:
// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
// CHECK2: omp.loop.exit:
-// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
// CHECK2-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP17]])
// CHECK2-NEXT: br label [[OMP_PRECOND_END]]
diff --git a/clang/test/OpenMP/nvptx_target_teams_ompx_bare_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_ompx_bare_codegen.cpp
index 2e6f0a9ce0169..e43545bc1324a 100644
--- a/clang/test/OpenMP/nvptx_target_teams_ompx_bare_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_ompx_bare_codegen.cpp
@@ -30,29 +30,36 @@ int bar(int n){
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l13
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
-// CHECK-NEXT: store i8 [[TMP0]], ptr [[A_CASTED]], align 1
-// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED]], align 8
-// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l13_omp_outlined(ptr null, ptr [[DOTZERO_ADDR]], i64 [[TMP1]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A_ADDR_ASCAST]], align 1
+// CHECK-NEXT: store i8 [[TMP0]], ptr [[A_CASTED_ASCAST]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8
+// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l13_omp_outlined(ptr null, ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP1]]) #[[ATTR2:[0-9]+]]
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l13_omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[A:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK-NEXT: store i8 2, ptr [[A_ADDR]], align 1
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i8 2, ptr [[A_ADDR_ASCAST]], align 1
// CHECK-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp
index c71f5da40a147..268aca40b847b 100644
--- a/clang/test/OpenMP/nvptx_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp
@@ -79,23 +79,27 @@ int main (int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
@@ -106,13 +110,16 @@ int main (int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4
// CHECK1-NEXT: ret void
//
@@ -120,23 +127,27 @@ int main (int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[ARGC:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK1-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
@@ -147,13 +158,16 @@ int main (int argc, char **argv) {
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]]
// CHECK1-NEXT: store ptr null, ptr [[TMP0]], align 8
// CHECK1-NEXT: ret void
//
@@ -161,23 +175,27 @@ int main (int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
@@ -188,13 +206,16 @@ int main (int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4
// CHECK2-NEXT: ret void
//
@@ -202,23 +223,27 @@ int main (int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[ARGC:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
@@ -229,13 +254,16 @@ int main (int argc, char **argv) {
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META7]]
// CHECK2-NEXT: store ptr null, ptr [[TMP0]], align 4
// CHECK2-NEXT: ret void
//
@@ -243,27 +271,33 @@ int main (int argc, char **argv) {
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], i64 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
-// CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR]], align 8
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 4)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
@@ -274,13 +308,16 @@ int main (int argc, char **argv) {
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 8, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
// CHECK3-NEXT: store i32 0, ptr [[TMP0]], align 4
// CHECK3-NEXT: ret void
//
@@ -288,27 +325,33 @@ int main (int argc, char **argv) {
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]], ptr noundef [[ARGC:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
-// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
-// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store i64 [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 8
// CHECK3-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK3-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 8
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i64 8)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
@@ -319,13 +362,16 @@ int main (int argc, char **argv) {
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR2]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK3-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 8
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 8, !nonnull [[META6]], !align [[META8:![0-9]+]]
// CHECK3-NEXT: store ptr null, ptr [[TMP0]], align 8
// CHECK3-NEXT: ret void
//
@@ -333,27 +379,33 @@ int main (int argc, char **argv) {
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64
// CHECK4-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[ARGC:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK4-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK4-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK4-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK4-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK4-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_kernel_environment, ptr [[DYN_PTR]])
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
-// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4
+// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK4-NEXT: store i32 [[TMP1]], ptr [[ARGC1]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
+// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[ARGC1]]) #[[ATTR3:[0-9]+]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK4-NEXT: call void @__kmpc_target_deinit()
// CHECK4-NEXT: ret void
@@ -364,13 +416,16 @@ int main (int argc, char **argv) {
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_omp_outlined
// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
+// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK4-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK4-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]]
// CHECK4-NEXT: store i32 0, ptr [[TMP0]], align 4
// CHECK4-NEXT: ret void
//
@@ -378,27 +433,33 @@ int main (int argc, char **argv) {
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53
// CHECK4-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], ptr noundef [[ARGC:%.*]]) #[[ATTR0]] {
// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
-// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
+// CHECK4-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK4-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK4-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK4-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK4-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK4-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK4-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK4-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_kernel_environment, ptr [[DYN_PTR]])
// CHECK4-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK4: user_code.entry:
-// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
+// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 4
// CHECK4-NEXT: [[ARGC1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK4-NEXT: store ptr [[TMP1]], ptr [[ARGC1]], align 4
// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[ARGC1]]) #[[ATTR3]]
+// CHECK4-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[ARGC1]]) #[[ATTR3]]
// CHECK4-NEXT: call void @__kmpc_free_shared(ptr [[ARGC1]], i32 4)
// CHECK4-NEXT: call void @__kmpc_target_deinit()
// CHECK4-NEXT: ret void
@@ -409,13 +470,16 @@ int main (int argc, char **argv) {
// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_omp_outlined
// CHECK4-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR2]] {
// CHECK4-NEXT: entry:
-// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4
-// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 4
-// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 4
+// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK4-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK4-NEXT: [[ARGC_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARGC_ADDR]] to ptr
+// CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR_ASCAST]], align 4
+// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR_ASCAST]], align 4, !nonnull [[META6]], !align [[META7]]
// CHECK4-NEXT: store ptr null, ptr [[TMP0]], align 4
// CHECK4-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
index 350b6f761e573..138ef4c94ec7b 100644
--- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp
@@ -53,23 +53,27 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[E]], ptr [[E_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E_ADDR]], align 8
+// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK1-NEXT: store double [[TMP1]], ptr [[E1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i64 8)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
@@ -80,23 +84,27 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK1-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
// CHECK1-NEXT: store double 0.000000e+00, ptr [[E1]], align 8
// CHECK1-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8
// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00
// CHECK1-NEXT: store double [[ADD]], ptr [[E1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[E1]], ptr [[TMP2]], align 8
// CHECK1-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
+// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
@@ -113,32 +121,38 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1
// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK1-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK1-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK1-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK1-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -153,7 +167,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK1-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
@@ -163,7 +177,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK1-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK1: then4:
-// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
@@ -179,57 +193,60 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[PRECOND:%.*]]
// CHECK1: precond:
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
-// CHECK1-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
+// CHECK1-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK1: body:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8
-// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK1-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK1-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
-// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
+// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK1: then3:
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
-// CHECK1-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
-// CHECK1-NEXT: br label [[IFCONT4:%.*]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
+// CHECK1-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
+// CHECK1-NEXT: br label [[IFCONT5:%.*]]
// CHECK1: else4:
-// CHECK1-NEXT: br label [[IFCONT4]]
+// CHECK1-NEXT: br label [[IFCONT5]]
// CHECK1: ifcont5:
-// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK1-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK1-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK1-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK1-NEXT: br label [[PRECOND]]
// CHECK1: exit:
// CHECK1-NEXT: ret void
@@ -238,112 +255,131 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
-// CHECK1-NEXT: store double [[TMP9]], ptr [[E]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP7]], align 8
+// CHECK1-NEXT: store double [[TMP10]], ptr [[TMP9]], align 8
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[E]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP8]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load double, ptr [[E]], align 8
-// CHECK1-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8
+// CHECK1-NEXT: store double [[TMP10]], ptr [[TMP7]], align 8
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[E]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP8]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[C:%.*]], i64 noundef [[D:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1
+// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR_ASCAST]], align 1
// CHECK1-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1)
// CHECK1-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1
-// CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR_ASCAST]], align 4
// CHECK1-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: store float [[TMP2]], ptr [[D2]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i64 4)
// CHECK1-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i64 1)
// CHECK1-NEXT: call void @__kmpc_target_deinit()
@@ -355,17 +391,22 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !nonnull [[META7]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9:![0-9]+]]
// CHECK1-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 1)
// CHECK1-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 4)
// CHECK1-NEXT: store i8 0, ptr [[C1]], align 1
@@ -378,12 +419,12 @@ int bar(int n){
// CHECK1-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01
// CHECK1-NEXT: store float [[MUL]], ptr [[D2]], align 4
-// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[C1]], ptr [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: store ptr [[D2]], ptr [[TMP5]], align 8
// CHECK1-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6)
+// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6)
// CHECK1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
// CHECK1-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
@@ -408,24 +449,31 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
// CHECK1-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
@@ -433,22 +481,22 @@ int bar(int n){
// CHECK1-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
-// CHECK1-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK1-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 1
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i64 1
-// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i64 1
// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
// CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK1-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
-// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK1-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 4
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i64 1
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP22]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -463,7 +511,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
@@ -473,13 +521,13 @@ int bar(int n){
// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK1: then5:
-// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8
// CHECK1-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
// CHECK1-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8
@@ -495,73 +543,75 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
-// CHECK1-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK1-NEXT: store volatile i8 [[TMP9]], ptr addrspace(3) [[TMP8]], align 1
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK1: then3:
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
-// CHECK1-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
-// CHECK1-NEXT: br label [[IFCONT4:%.*]]
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load volatile i8, ptr addrspace(3) [[TMP11]], align 1
+// CHECK1-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
+// CHECK1-NEXT: br label [[IFCONT5:%.*]]
// CHECK1: else4:
-// CHECK1-NEXT: br label [[IFCONT4]]
-// CHECK1: ifcont5
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK1-NEXT: br label [[IFCONT5]]
+// CHECK1: ifcont5:
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK1: then8:
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK1-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
-// CHECK1-NEXT: br label [[IFCONT8:%.*]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK1-NEXT: store volatile i32 [[TMP18]], ptr addrspace(3) [[TMP17]], align 4
+// CHECK1-NEXT: br label [[IFCONT10:%.*]]
// CHECK1: else9:
-// CHECK1-NEXT: br label [[IFCONT8]]
+// CHECK1-NEXT: br label [[IFCONT10]]
// CHECK1: ifcont10:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK1: then13:
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
-// CHECK1-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
-// CHECK1-NEXT: br label [[IFCONT12:%.*]]
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load volatile i32, ptr addrspace(3) [[TMP20]], align 4
+// CHECK1-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4
+// CHECK1-NEXT: br label [[IFCONT15:%.*]]
// CHECK1: else14:
-// CHECK1-NEXT: br label [[IFCONT12]]
+// CHECK1-NEXT: br label [[IFCONT15]]
// CHECK1: ifcont15:
// CHECK1-NEXT: ret void
//
@@ -569,126 +619,145 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func3
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-// CHECK1-NEXT: store i8 [[TMP9]], ptr [[C]], align 1
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
-// CHECK1-NEXT: store float [[TMP13]], ptr [[D]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK1-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP13]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP12]], align 4
+// CHECK1-NEXT: store float [[TMP15]], ptr [[TMP14]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func4
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[C]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[D]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP11]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func5
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1
-// CHECK1-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP13:%.*]] = load float, ptr [[D]], align 4
-// CHECK1-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+// CHECK1-NEXT: store i8 [[TMP10]], ptr [[TMP7]], align 1
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP13]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4
+// CHECK1-NEXT: store float [[TMP15]], ptr [[TMP12]], align 4
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func6
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[C]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[D]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33
// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK1: user_code.entry:
// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
+// CHECK1-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[A_ADDR_ASCAST]], ptr [[B_ADDR_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: call void @__kmpc_target_deinit()
// CHECK1-NEXT: ret void
// CHECK1: worker.exit:
@@ -698,45 +767,53 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK1-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK1-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META10:![0-9]+]]
+// CHECK1-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK1-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 2)
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP7]], align 8
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 2)
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP7]], align 8
// CHECK1-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14)
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14)
// CHECK1-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP10]], [[TMP11]]
// CHECK1-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4
// CHECK1-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32
-// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]]
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -744,7 +821,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
@@ -757,53 +834,60 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined
// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR2]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK1-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK1-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META9]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !nonnull [[META7]], !align [[META10]]
+// CHECK1-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK1-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1
-// CHECK1-NEXT: store i32 [[OR]], ptr [[A1]], align 4
-// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: store i32 [[OR]], ptr [[A1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]]
// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ]
// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16
-// CHECK1-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: store ptr [[A1]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: store ptr [[B2]], ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8)
+// CHECK1-NEXT: store i16 [[CONV4]], ptr [[B2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8)
// CHECK1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
// CHECK1-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK1: .omp.reduction.then:
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP9]], [[TMP10]]
// CHECK1-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
// CHECK1-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP11]] to i32
-// CHECK1-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP12]] to i32
// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
@@ -811,7 +895,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK1-NEXT: br label [[COND_END11:%.*]]
// CHECK1: cond.false10:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK1-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK1-NEXT: br label [[COND_END11]]
// CHECK1: cond.end11:
// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP13]], [[COND_TRUE9]] ], [ [[TMP14]], [[COND_FALSE10]] ]
@@ -824,36 +908,43 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK1-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -861,10 +952,10 @@ int bar(int n){
// CHECK1-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -879,7 +970,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
@@ -889,13 +980,13 @@ int bar(int n){
// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK1: then5:
-// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8
// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK1-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8
@@ -911,73 +1002,75 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK1-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK1: then3:
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK1-NEXT: br label [[IFCONT4:%.*]]
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK1-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK1-NEXT: br label [[IFCONT5:%.*]]
// CHECK1: else4:
-// CHECK1-NEXT: br label [[IFCONT4]]
+// CHECK1-NEXT: br label [[IFCONT5]]
// CHECK1: ifcont5:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK1: then8:
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK1-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK1-NEXT: br label [[IFCONT8:%.*]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK1-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK1-NEXT: br label [[IFCONT10:%.*]]
// CHECK1: else9:
-// CHECK1-NEXT: br label [[IFCONT8]]
+// CHECK1-NEXT: br label [[IFCONT10]]
// CHECK1: ifcont10:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK1: then13:
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK1-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK1-NEXT: br label [[IFCONT12:%.*]]
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK1-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK1-NEXT: br label [[IFCONT15:%.*]]
// CHECK1: else14:
-// CHECK1-NEXT: br label [[IFCONT12]]
+// CHECK1-NEXT: br label [[IFCONT15]]
// CHECK1: ifcont15:
// CHECK1-NEXT: ret void
//
@@ -985,36 +1078,43 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func9
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK1-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i64 1
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK1-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8
-// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
// CHECK1-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK1-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -1022,10 +1122,10 @@ int bar(int n){
// CHECK1-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK1-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i64 1
-// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i64 1
-// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 8
+// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i64 1
+// CHECK1-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 8
// CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK1-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -1040,7 +1140,7 @@ int bar(int n){
// CHECK1-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK1-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
@@ -1050,13 +1150,13 @@ int bar(int n){
// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK1-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK1: then5:
-// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK1-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 8
// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK1-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 8
// CHECK1-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK1-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 1
// CHECK1-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 8
// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i64 0, i64 1
// CHECK1-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 8
@@ -1072,73 +1172,75 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func10
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK1: then:
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK1-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK1-NEXT: br label [[IFCONT:%.*]]
// CHECK1: else:
// CHECK1-NEXT: br label [[IFCONT]]
// CHECK1: ifcont:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK1: then3:
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8
-// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK1-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK1-NEXT: br label [[IFCONT4:%.*]]
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK1-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK1-NEXT: br label [[IFCONT5:%.*]]
// CHECK1: else4:
-// CHECK1-NEXT: br label [[IFCONT4]]
+// CHECK1-NEXT: br label [[IFCONT5]]
// CHECK1: ifcont5:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK1-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK1: then8:
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8
-// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK1-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK1-NEXT: br label [[IFCONT8:%.*]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK1-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK1-NEXT: br label [[IFCONT10:%.*]]
// CHECK1: else9:
-// CHECK1-NEXT: br label [[IFCONT8]]
+// CHECK1-NEXT: br label [[IFCONT10]]
// CHECK1: ifcont10:
-// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK1: then13:
-// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 8
-// CHECK1-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK1-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK1-NEXT: br label [[IFCONT12:%.*]]
+// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8
+// CHECK1-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK1-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK1-NEXT: br label [[IFCONT15:%.*]]
// CHECK1: else14:
-// CHECK1-NEXT: br label [[IFCONT12]]
+// CHECK1-NEXT: br label [[IFCONT15]]
// CHECK1: ifcont15:
// CHECK1-NEXT: ret void
//
@@ -1146,128 +1248,147 @@ int bar(int n){
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func11
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK1-NEXT: store i32 [[TMP9]], ptr [[A]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
-// CHECK1-NEXT: store i16 [[TMP13]], ptr [[B]], align 2
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP13]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP12]], align 2
+// CHECK1-NEXT: store i16 [[TMP15]], ptr [[TMP14]], align 2
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func12
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[B]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP11]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func13
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
-// CHECK1-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
-// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1
-// CHECK1-NEXT: [[TMP13:%.*]] = load i16, ptr [[B]], align 2
-// CHECK1-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[TMP7]], align 4
+// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP13]], i32 0, i32 1
+// CHECK1-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP14]], align 2
+// CHECK1-NEXT: store i16 [[TMP15]], ptr [[TMP12]], align 2
// CHECK1-NEXT: ret void
//
//
// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func14
// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8
-// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0
-// CHECK1-NEXT: store ptr [[A]], ptr [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1
-// CHECK1-NEXT: store ptr [[B]], ptr [[TMP7]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 8
-// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK1-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK1-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 1
+// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 8
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[E1:%.*]] = alloca double, align 8
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[E1:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK2-NEXT: [[E1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E1]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK2-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8
-// CHECK2-NEXT: store double [[TMP3]], ptr [[E1]], align 8
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
+// CHECK2-NEXT: store double [[TMP3]], ptr [[E1_ASCAST]], align 8
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[E1_ASCAST]]) #[[ATTR4:[0-9]+]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1277,23 +1398,27 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
// CHECK2-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8)
// CHECK2-NEXT: store double 0.000000e+00, ptr [[E1]], align 8
// CHECK2-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8
// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00
// CHECK2-NEXT: store double [[ADD]], ptr [[E1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[E1]], ptr [[TMP2]], align 4
// CHECK2-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
+// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK2: .omp.reduction.then:
@@ -1310,32 +1435,38 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1
// CHECK2-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK2-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK2-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1
-// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK2-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK2-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -1350,7 +1481,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK2-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
@@ -1360,7 +1491,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK2-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK2: then4:
-// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4
// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4
@@ -1376,57 +1507,60 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[PRECOND:%.*]]
// CHECK2: precond:
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
-// CHECK2-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
+// CHECK2-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK2: body:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK2-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK2-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
-// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
+// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK2: then3:
-// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
-// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
-// CHECK2-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
-// CHECK2-NEXT: br label [[IFCONT4:%.*]]
+// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
+// CHECK2-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
+// CHECK2-NEXT: br label [[IFCONT5:%.*]]
// CHECK2: else4:
-// CHECK2-NEXT: br label [[IFCONT4]]
+// CHECK2-NEXT: br label [[IFCONT5]]
// CHECK2: ifcont5:
-// CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK2-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK2-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK2-NEXT: br label [[PRECOND]]
// CHECK2: exit:
// CHECK2-NEXT: ret void
@@ -1435,112 +1569,131 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
-// CHECK2-NEXT: store double [[TMP9]], ptr [[E]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP7]], align 8
+// CHECK2-NEXT: store double [[TMP10]], ptr [[TMP9]], align 8
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[E]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP8]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load double, ptr [[E]], align 8
-// CHECK2-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8
+// CHECK2-NEXT: store double [[TMP10]], ptr [[TMP7]], align 8
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[E]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP8]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[D]], ptr [[D_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
-// CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1
+// CHECK2-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR_ASCAST]], align 1
// CHECK2-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1)
// CHECK2-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1
-// CHECK2-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store float [[TMP2]], ptr [[D2]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
// CHECK2-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
// CHECK2-NEXT: call void @__kmpc_target_deinit()
@@ -1552,17 +1705,22 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK2-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META7]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9:![0-9]+]]
// CHECK2-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1)
// CHECK2-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK2-NEXT: store i8 0, ptr [[C1]], align 1
@@ -1575,12 +1733,12 @@ int bar(int n){
// CHECK2-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01
// CHECK2-NEXT: store float [[MUL]], ptr [[D2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4
// CHECK2-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6)
+// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6)
// CHECK2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
// CHECK2-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK2: .omp.reduction.then:
@@ -1605,24 +1763,31 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
// CHECK2-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
@@ -1630,22 +1795,22 @@ int bar(int n){
// CHECK2-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
-// CHECK2-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK2-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 1
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
-// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1
// CHECK2-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
// CHECK2-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK2-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
// CHECK2-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
-// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK2-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 4
// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1
-// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP22]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -1660,7 +1825,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
@@ -1670,13 +1835,13 @@ int bar(int n){
// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK2: then5:
-// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK2-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
// CHECK2-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
-// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -1692,73 +1857,75 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
-// CHECK2-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK2-NEXT: store volatile i8 [[TMP9]], ptr addrspace(3) [[TMP8]], align 1
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK2: then3:
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
-// CHECK2-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
-// CHECK2-NEXT: br label [[IFCONT4:%.*]]
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load volatile i8, ptr addrspace(3) [[TMP11]], align 1
+// CHECK2-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
+// CHECK2-NEXT: br label [[IFCONT5:%.*]]
// CHECK2: else4:
-// CHECK2-NEXT: br label [[IFCONT4]]
+// CHECK2-NEXT: br label [[IFCONT5]]
// CHECK2: ifcont5:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK2: then8:
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK2-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
-// CHECK2-NEXT: br label [[IFCONT8:%.*]]
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK2-NEXT: store volatile i32 [[TMP18]], ptr addrspace(3) [[TMP17]], align 4
+// CHECK2-NEXT: br label [[IFCONT10:%.*]]
// CHECK2: else9:
-// CHECK2-NEXT: br label [[IFCONT8]]
+// CHECK2-NEXT: br label [[IFCONT10]]
// CHECK2: ifcont10:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK2: then13:
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
-// CHECK2-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
-// CHECK2-NEXT: br label [[IFCONT12:%.*]]
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load volatile i32, ptr addrspace(3) [[TMP20]], align 4
+// CHECK2-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4
+// CHECK2-NEXT: br label [[IFCONT15:%.*]]
// CHECK2: else14:
-// CHECK2-NEXT: br label [[IFCONT12]]
+// CHECK2-NEXT: br label [[IFCONT15]]
// CHECK2: ifcont15:
// CHECK2-NEXT: ret void
//
@@ -1766,126 +1933,145 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func3
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-// CHECK2-NEXT: store i8 [[TMP9]], ptr [[C]], align 1
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
-// CHECK2-NEXT: store float [[TMP13]], ptr [[D]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK2-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP13]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP12]], align 4
+// CHECK2-NEXT: store float [[TMP15]], ptr [[TMP14]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func4
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[C]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[D]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP11]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func5
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1
-// CHECK2-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP13:%.*]] = load float, ptr [[D]], align 4
-// CHECK2-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+// CHECK2-NEXT: store i8 [[TMP10]], ptr [[TMP7]], align 1
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP13]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4
+// CHECK2-NEXT: store float [[TMP15]], ptr [[TMP12]], align 4
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func6
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[C]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[D]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33
// CHECK2-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK2-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK2: user_code.entry:
// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
+// CHECK2-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[A_ADDR_ASCAST]], ptr [[B_ADDR_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: call void @__kmpc_target_deinit()
// CHECK2-NEXT: ret void
// CHECK2: worker.exit:
@@ -1895,45 +2081,53 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK2-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK2-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META10:![0-9]+]]
+// CHECK2-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK2-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP2]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP7]], align 4
// CHECK2-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14)
+// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14)
// CHECK2-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
// CHECK2-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK2: .omp.reduction.then:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP10]], [[TMP11]]
// CHECK2-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4
// CHECK2-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32
-// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]]
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -1941,7 +2135,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
@@ -1954,53 +2148,60 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined
// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK2-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK2-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK2-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK2-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK2-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META10]]
+// CHECK2-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK2-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1
-// CHECK2-NEXT: store i32 [[OR]], ptr [[A1]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: store i32 [[OR]], ptr [[A1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]]
// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ]
// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16
-// CHECK2-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[B2]], ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8)
+// CHECK2-NEXT: store i16 [[CONV4]], ptr [[B2_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8)
// CHECK2-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
// CHECK2-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK2: .omp.reduction.then:
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP9]], [[TMP10]]
// CHECK2-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
// CHECK2-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP11]] to i32
-// CHECK2-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP12]] to i32
// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
@@ -2008,7 +2209,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK2-NEXT: br label [[COND_END11:%.*]]
// CHECK2: cond.false10:
-// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK2-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK2-NEXT: br label [[COND_END11]]
// CHECK2: cond.end11:
// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP13]], [[COND_TRUE9]] ], [ [[TMP14]], [[COND_FALSE10]] ]
@@ -2021,36 +2222,43 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
-// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK2-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -2058,10 +2266,10 @@ int bar(int n){
// CHECK2-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
-// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -2076,7 +2284,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
@@ -2086,13 +2294,13 @@ int bar(int n){
// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK2: then5:
-// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK2-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -2108,73 +2316,75 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK2-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK2-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK2: then3:
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK2-NEXT: br label [[IFCONT4:%.*]]
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK2-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK2-NEXT: br label [[IFCONT5:%.*]]
// CHECK2: else4:
-// CHECK2-NEXT: br label [[IFCONT4]]
+// CHECK2-NEXT: br label [[IFCONT5]]
// CHECK2: ifcont5:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK2: then8:
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK2-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK2-NEXT: br label [[IFCONT8:%.*]]
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK2-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK2-NEXT: br label [[IFCONT10:%.*]]
// CHECK2: else9:
-// CHECK2-NEXT: br label [[IFCONT8]]
+// CHECK2-NEXT: br label [[IFCONT10]]
// CHECK2: ifcont10:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK2: then13:
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK2-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK2-NEXT: br label [[IFCONT12:%.*]]
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK2-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK2-NEXT: br label [[IFCONT15:%.*]]
// CHECK2: else14:
-// CHECK2-NEXT: br label [[IFCONT12]]
+// CHECK2-NEXT: br label [[IFCONT15]]
// CHECK2: ifcont15:
// CHECK2-NEXT: ret void
//
@@ -2182,36 +2392,43 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func9
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK2-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK2-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK2-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
-// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK2-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
-// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
// CHECK2-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK2-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -2219,10 +2436,10 @@ int bar(int n){
// CHECK2-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK2-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
-// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK2-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 4
// CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK2-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -2237,7 +2454,7 @@ int bar(int n){
// CHECK2-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK2-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
@@ -2247,13 +2464,13 @@ int bar(int n){
// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK2-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK2: then5:
-// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK2-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK2-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK2-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK2-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK2-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK2-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -2269,73 +2486,75 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func10
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK2: then:
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK2-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK2-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK2-NEXT: br label [[IFCONT:%.*]]
// CHECK2: else:
// CHECK2-NEXT: br label [[IFCONT]]
// CHECK2: ifcont:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK2: then3:
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK2-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK2-NEXT: br label [[IFCONT4:%.*]]
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK2-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK2-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK2-NEXT: br label [[IFCONT5:%.*]]
// CHECK2: else4:
-// CHECK2-NEXT: br label [[IFCONT4]]
+// CHECK2-NEXT: br label [[IFCONT5]]
// CHECK2: ifcont5:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK2-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK2: then8:
-// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK2-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK2-NEXT: br label [[IFCONT8:%.*]]
+// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK2-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK2-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK2-NEXT: br label [[IFCONT10:%.*]]
// CHECK2: else9:
-// CHECK2-NEXT: br label [[IFCONT8]]
+// CHECK2-NEXT: br label [[IFCONT10]]
// CHECK2: ifcont10:
-// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK2: then13:
-// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK2-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK2-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK2-NEXT: br label [[IFCONT12:%.*]]
+// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK2-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK2-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK2-NEXT: br label [[IFCONT15:%.*]]
// CHECK2: else14:
-// CHECK2-NEXT: br label [[IFCONT12]]
+// CHECK2-NEXT: br label [[IFCONT15]]
// CHECK2: ifcont15:
// CHECK2-NEXT: ret void
//
@@ -2343,128 +2562,147 @@ int bar(int n){
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func11
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[A]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
-// CHECK2-NEXT: store i16 [[TMP13]], ptr [[B]], align 2
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP13]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP12]], align 2
+// CHECK2-NEXT: store i16 [[TMP15]], ptr [[TMP14]], align 2
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func12
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[A]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[B]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP11]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func13
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
-// CHECK2-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP13:%.*]] = load i16, ptr [[B]], align 2
-// CHECK2-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK2-NEXT: store i32 [[TMP10]], ptr [[TMP7]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4
+// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP13]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP14]], align 2
+// CHECK2-NEXT: store i16 [[TMP15]], ptr [[TMP12]], align 2
// CHECK2-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func14
// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK2-NEXT: entry:
-// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0
-// CHECK2-NEXT: store ptr [[A]], ptr [[TMP5]], align 4
-// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1
-// CHECK2-NEXT: store ptr [[B]], ptr [[TMP7]], align 4
-// CHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK2-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK2-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK2-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0
+// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1
+// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4
+// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK2-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK3-NEXT: [[E1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E1]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 4, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8
-// CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]]
+// CHECK3-NEXT: store double [[TMP3]], ptr [[E1_ASCAST]], align 8
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[E1_ASCAST]]) #[[ATTR4:[0-9]+]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -2474,23 +2712,27 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META8]]
// CHECK3-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8)
// CHECK3-NEXT: store double 0.000000e+00, ptr [[E1]], align 8
// CHECK3-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8
// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00
// CHECK3-NEXT: store double [[ADD]], ptr [[E1]], align 8
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[E1]], ptr [[TMP2]], align 4
// CHECK3-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
+// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1
// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK3: .omp.reduction.then:
@@ -2507,32 +2749,38 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1
// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK3-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK3-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -2547,7 +2795,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK3-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
@@ -2557,7 +2805,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK3-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK3: then4:
-// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4
// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4
@@ -2573,57 +2821,60 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK3-NEXT: br label [[PRECOND:%.*]]
// CHECK3: precond:
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], 2
-// CHECK3-NEXT: br i1 [[TMP8]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
+// CHECK3-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK3: body:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]])
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP11]], align 4
-// CHECK3-NEXT: store volatile i32 [[TMP13]], ptr addrspace(3) [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK3-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP14]]
-// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
+// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK3: then3:
-// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[TMP17]], i32 [[TMP7]]
-// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, ptr addrspace(3) [[TMP15]], align 4
-// CHECK3-NEXT: store i32 [[TMP19]], ptr [[TMP18]], align 4
-// CHECK3-NEXT: br label [[IFCONT4:%.*]]
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
+// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
+// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
+// CHECK3-NEXT: br label [[IFCONT5:%.*]]
// CHECK3: else4:
-// CHECK3-NEXT: br label [[IFCONT4]]
+// CHECK3-NEXT: br label [[IFCONT5]]
// CHECK3: ifcont5:
-// CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK3-NEXT: store i32 [[TMP20]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK3-NEXT: br label [[PRECOND]]
// CHECK3: exit:
// CHECK3-NEXT: ret void
@@ -2632,112 +2883,131 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP7]], align 8
-// CHECK3-NEXT: store double [[TMP9]], ptr [[E]], align 8
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP7]], align 8
+// CHECK3-NEXT: store double [[TMP10]], ptr [[TMP9]], align 8
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[E]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP7]]) #[[ATTR4]]
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP8]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load double, ptr [[E]], align 8
-// CHECK3-NEXT: store double [[TMP9]], ptr [[TMP7]], align 8
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8
+// CHECK3-NEXT: store double [[TMP10]], ptr [[TMP7]], align 8
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[E]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP7]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP8]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
-// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1
+// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR_ASCAST]], align 1
// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1)
// CHECK3-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1
-// CHECK3-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK3-NEXT: store float [[TMP2]], ptr [[D2]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4)
// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1)
// CHECK3-NEXT: call void @__kmpc_target_deinit()
@@ -2749,17 +3019,22 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK3-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 4, !nonnull [[META7]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9:![0-9]+]]
// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1)
// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4)
// CHECK3-NEXT: store i8 0, ptr [[C1]], align 1
@@ -2772,12 +3047,12 @@ int bar(int n){
// CHECK3-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4
// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01
// CHECK3-NEXT: store float [[MUL]], ptr [[D2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4
// CHECK3-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6)
+// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6)
// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1
// CHECK3-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK3: .omp.reduction.then:
@@ -2802,24 +3077,31 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1
// CHECK3-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32
@@ -2827,22 +3109,22 @@ int bar(int n){
// CHECK3-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16
// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]])
// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8
-// CHECK3-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1
+// CHECK3-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 1
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1
-// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1
// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4
// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK3-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16
// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]])
-// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4
+// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 4
// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1
-// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP22]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -2857,7 +3139,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
@@ -2867,13 +3149,13 @@ int bar(int n){
// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK3: then5:
-// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK3-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1
// CHECK3-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1
-// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -2889,73 +3171,75 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP8]], align 1
-// CHECK3-NEXT: store volatile i8 [[TMP10]], ptr addrspace(3) [[TMP9]], align 1
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK3-NEXT: store volatile i8 [[TMP9]], ptr addrspace(3) [[TMP8]], align 1
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK3: then3:
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i8, ptr addrspace(3) [[TMP12]], align 1
-// CHECK3-NEXT: store i8 [[TMP15]], ptr [[TMP14]], align 1
-// CHECK3-NEXT: br label [[IFCONT4:%.*]]
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load volatile i8, ptr addrspace(3) [[TMP11]], align 1
+// CHECK3-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1
+// CHECK3-NEXT: br label [[IFCONT5:%.*]]
// CHECK3: else4:
-// CHECK3-NEXT: br label [[IFCONT4]]
+// CHECK3-NEXT: br label [[IFCONT5]]
// CHECK3: ifcont5:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK3: then8:
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4
-// CHECK3-NEXT: store volatile i32 [[TMP19]], ptr addrspace(3) [[TMP18]], align 4
-// CHECK3-NEXT: br label [[IFCONT8:%.*]]
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4
+// CHECK3-NEXT: store volatile i32 [[TMP18]], ptr addrspace(3) [[TMP17]], align 4
+// CHECK3-NEXT: br label [[IFCONT10:%.*]]
// CHECK3: else9:
-// CHECK3-NEXT: br label [[IFCONT8]]
+// CHECK3-NEXT: br label [[IFCONT10]]
// CHECK3: ifcont10:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK3: then13:
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i32, ptr addrspace(3) [[TMP21]], align 4
-// CHECK3-NEXT: store i32 [[TMP24]], ptr [[TMP23]], align 4
-// CHECK3-NEXT: br label [[IFCONT12:%.*]]
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = load volatile i32, ptr addrspace(3) [[TMP20]], align 4
+// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4
+// CHECK3-NEXT: br label [[IFCONT15:%.*]]
// CHECK3: else14:
-// CHECK3-NEXT: br label [[IFCONT12]]
+// CHECK3-NEXT: br label [[IFCONT15]]
// CHECK3: ifcont15:
// CHECK3-NEXT: ret void
//
@@ -2963,126 +3247,145 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func3
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1
-// CHECK3-NEXT: store i8 [[TMP9]], ptr [[C]], align 1
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[TMP11]], align 4
-// CHECK3-NEXT: store float [[TMP13]], ptr [[D]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP7]], align 1
+// CHECK3-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP13]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP12]], align 4
+// CHECK3-NEXT: store float [[TMP15]], ptr [[TMP14]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func4
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[C]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[D]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP11]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func5
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[C]], align 1
-// CHECK3-NEXT: store i8 [[TMP9]], ptr [[TMP7]], align 1
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP12]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP13:%.*]] = load float, ptr [[D]], align 4
-// CHECK3-NEXT: store float [[TMP13]], ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1
+// CHECK3-NEXT: store i8 [[TMP10]], ptr [[TMP7]], align 1
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP13]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4
+// CHECK3-NEXT: store float [[TMP15]], ptr [[TMP12]], align 4
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func6
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[C]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], ptr [[TMP8]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[D]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33
// CHECK3-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK3-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment, ptr [[DYN_PTR]])
// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK3: user_code.entry:
// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4
-// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]]
+// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], ptr [[A_ADDR_ASCAST]], ptr [[B_ADDR_ASCAST]]) #[[ATTR4]]
// CHECK3-NEXT: call void @__kmpc_target_deinit()
// CHECK3-NEXT: ret void
// CHECK3: worker.exit:
@@ -3092,45 +3395,53 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK3-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META10:![0-9]+]]
+// CHECK3-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK3-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP2]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP3]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2)
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i32 2)
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP7]], align 4
// CHECK3-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
-// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14)
+// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14)
// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1
// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK3: .omp.reduction.then:
// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP10]], [[TMP11]]
// CHECK3-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4
// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32
-// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]]
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
@@ -3138,7 +3449,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ]
@@ -3151,53 +3462,60 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined
// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
-// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
-// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4
-// CHECK3-NEXT: store i32 0, ptr [[A1]], align 4
-// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK3-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK3-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK3-NEXT: [[A1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A1]] to ptr
+// CHECK3-NEXT: [[B2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B2]] to ptr
+// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META9]]
+// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 4, !nonnull [[META7]], !align [[META10]]
+// CHECK3-NEXT: store i32 0, ptr [[A1_ASCAST]], align 4
+// CHECK3-NEXT: store i16 -32768, ptr [[B2_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1
-// CHECK3-NEXT: store i32 [[OR]], ptr [[A1]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: store i32 [[OR]], ptr [[A1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32
// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]]
// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK3: cond.true:
// CHECK3-NEXT: br label [[COND_END:%.*]]
// CHECK3: cond.false:
-// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32
// CHECK3-NEXT: br label [[COND_END]]
// CHECK3: cond.end:
// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ]
// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16
-// CHECK3-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8)
+// CHECK3-NEXT: store i16 [[CONV4]], ptr [[B2_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[A1_ASCAST]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[B2_ASCAST]], ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8)
// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1
// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK3: .omp.reduction.then:
// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1_ASCAST]], align 4
// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP9]], [[TMP10]]
// CHECK3-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4
// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP11]] to i32
-// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP12]] to i32
// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]]
// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
@@ -3205,7 +3523,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2
// CHECK3-NEXT: br label [[COND_END11:%.*]]
// CHECK3: cond.false10:
-// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2
+// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2_ASCAST]], align 2
// CHECK3-NEXT: br label [[COND_END11]]
// CHECK3: cond.end11:
// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP13]], [[COND_TRUE9]] ], [ [[TMP14]], [[COND_FALSE10]] ]
@@ -3218,36 +3536,43 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -3255,10 +3580,10 @@ int bar(int n){
// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
-// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -3273,7 +3598,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
@@ -3283,13 +3608,13 @@ int bar(int n){
// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK3: then5:
-// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -3305,73 +3630,75 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK3-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK3: then3:
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: br label [[IFCONT4:%.*]]
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK3-NEXT: br label [[IFCONT5:%.*]]
// CHECK3: else4:
-// CHECK3-NEXT: br label [[IFCONT4]]
+// CHECK3-NEXT: br label [[IFCONT5]]
// CHECK3: ifcont5:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK3: then8:
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK3-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK3-NEXT: br label [[IFCONT8:%.*]]
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK3-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK3-NEXT: br label [[IFCONT10:%.*]]
// CHECK3: else9:
-// CHECK3-NEXT: br label [[IFCONT8]]
+// CHECK3-NEXT: br label [[IFCONT10]]
// CHECK3: ifcont10:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK3: then13:
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK3-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK3-NEXT: br label [[IFCONT12:%.*]]
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK3-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK3-NEXT: br label [[IFCONT15:%.*]]
// CHECK3: else14:
-// CHECK3-NEXT: br label [[IFCONT12]]
+// CHECK3-NEXT: br label [[IFCONT15]]
// CHECK3: ifcont15:
// CHECK3-NEXT: ret void
//
@@ -3379,36 +3706,43 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func9
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT4]] to ptr
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4
// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4
+// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 4
// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1
-// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 4
// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4
-// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2
// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32
@@ -3416,10 +3750,10 @@ int bar(int n){
// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16
// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]])
// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16
-// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2
+// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], align 2
// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1
-// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1
-// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4
+// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], i32 1
+// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4_ASCAST]], ptr [[TMP20]], align 4
// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -3434,7 +3768,7 @@ int bar(int n){
// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]]
// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR4]]
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
@@ -3444,13 +3778,13 @@ int bar(int n){
// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]]
// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]]
// CHECK3: then5:
-// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 0
// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4
// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0
// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4
// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4
// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4
-// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i32 0, i32 1
// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4
// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1
// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4
@@ -3466,73 +3800,75 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func10
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31
-// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5
-// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK3: then:
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP8]], align 4
-// CHECK3-NEXT: store volatile i32 [[TMP10]], ptr addrspace(3) [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4
// CHECK3-NEXT: br label [[IFCONT:%.*]]
// CHECK3: else:
// CHECK3-NEXT: br label [[IFCONT]]
// CHECK3: ifcont:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP11]]
-// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]]
+// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK3: then3:
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4
-// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, ptr addrspace(3) [[TMP12]], align 4
-// CHECK3-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4
-// CHECK3-NEXT: br label [[IFCONT4:%.*]]
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4
+// CHECK3-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4
+// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4
+// CHECK3-NEXT: br label [[IFCONT5:%.*]]
// CHECK3: else4:
-// CHECK3-NEXT: br label [[IFCONT4]]
+// CHECK3-NEXT: br label [[IFCONT5]]
// CHECK3: ifcont5:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[WARP_MASTER5:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
-// CHECK3-NEXT: br i1 [[WARP_MASTER5]], label [[THEN6:%.*]], label [[ELSE7:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
+// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]]
// CHECK3: then8:
-// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4
-// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
-// CHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[TMP17]], align 2
-// CHECK3-NEXT: store volatile i16 [[TMP19]], ptr addrspace(3) [[TMP18]], align 2
-// CHECK3-NEXT: br label [[IFCONT8:%.*]]
+// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4
+// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2
+// CHECK3-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2
+// CHECK3-NEXT: br label [[IFCONT10:%.*]]
// CHECK3: else9:
-// CHECK3-NEXT: br label [[IFCONT8]]
+// CHECK3-NEXT: br label [[IFCONT10]]
// CHECK3: ifcont10:
-// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
-// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[TMP2]])
-// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[IS_ACTIVE_THREAD9:%.*]] = icmp ult i32 [[TMP3]], [[TMP20]]
-// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD9]], label [[THEN10:%.*]], label [[ELSE11:%.*]]
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]])
+// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]]
+// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]]
// CHECK3: then13:
-// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]]
-// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP6]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[TMP22]], align 4
-// CHECK3-NEXT: [[TMP24:%.*]] = load volatile i16, ptr addrspace(3) [[TMP21]], align 2
-// CHECK3-NEXT: store i16 [[TMP24]], ptr [[TMP23]], align 2
-// CHECK3-NEXT: br label [[IFCONT12:%.*]]
+// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4
+// CHECK3-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2
+// CHECK3-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2
+// CHECK3-NEXT: br label [[IFCONT15:%.*]]
// CHECK3: else14:
-// CHECK3-NEXT: br label [[IFCONT12]]
+// CHECK3-NEXT: br label [[IFCONT15]]
// CHECK3: ifcont15:
// CHECK3-NEXT: ret void
//
@@ -3540,103 +3876,117 @@ int bar(int n){
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func11
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[A]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP11]], align 2
-// CHECK3-NEXT: store i16 [[TMP13]], ptr [[B]], align 2
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP7]], align 4
+// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP13]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP12]], align 2
+// CHECK3-NEXT: store i16 [[TMP15]], ptr [[TMP14]], align 2
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func12
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[B]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP9]]) #[[ATTR4]]
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP11]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func13
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0
// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4
-// CHECK3-NEXT: store i32 [[TMP9]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4
-// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP4]], i32 [[TMP5]]
-// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP12]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[B]], align 2
-// CHECK3-NEXT: store i16 [[TMP13]], ptr [[TMP11]], align 2
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4
+// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP7]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4
+// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP13]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP14]], align 2
+// CHECK3-NEXT: store i16 [[TMP15]], ptr [[TMP12]], align 2
// CHECK3-NEXT: ret void
//
//
// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func14
// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK3-NEXT: entry:
-// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4
-// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4
-// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4
-// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4
-// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
-// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP6]], i32 0, i32 0
-// CHECK3-NEXT: store ptr [[A]], ptr [[TMP5]], align 4
-// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1
-// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP3]], i32 [[TMP4]]
-// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], ptr [[TMP8]], i32 0, i32 1
-// CHECK3-NEXT: store ptr [[B]], ptr [[TMP7]], align 4
-// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTADDR2]], align 4
-// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP9]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]]
+// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4, addrspace(5)
+// CHECK3-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK3-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK3-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 0
+// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0
+// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i32 0, i32 1
+// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1
+// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4
+// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 4
+// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR4]]
// CHECK3-NEXT: ret void
//
diff --git a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
index d1d087ee887f3..0e83118bf5f33 100644
--- a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
@@ -34,14 +34,15 @@ struct T1 {
#pragma omp declare target
T a = T();
T f = a;
-// CHECK: define{{ hidden | }}void @{{.+}}foo{{.+}}(ptr noundef byval([[T]]) align {{.+}})
+// CHECK: define{{ hidden | }}void @{{.+}}foo{{.+}}(ptr addrspace(5) noundef byval([[T]]) align {{.+}})
void foo(T a = T()) {
return;
}
// CHECK: define{{ hidden | }}[6 x i64] @{{.+}}bar{{.+}}()
T bar() {
-// CHECK: [[RETVAL:%.+]] = alloca [[T]]
-// CHECK: load [6 x i64], ptr [[RETVAL]]
+// CHECK: [[RETVAL:%.+]] = alloca [[STRUCT_T:%.*]], align 16, addrspace(5)
+// CHECK: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK: load [6 x i64], ptr [[RETVAL_ASCAST]]
// CHECK-NEXT: ret [6 x i64]
return T();
}
@@ -53,7 +54,7 @@ void baz() {
}
T1 a1 = T1();
T1 f1 = a1;
-// CHECK: define{{ hidden | }}void @{{.+}}foo1{{.+}}(ptr noundef byval([[T1]]) align {{.+}})
+// CHECK: define{{ hidden | }}void @{{.+}}foo1{{.+}}(ptr addrspace(5) noundef byval([[T1]]) align {{.+}})
void foo1(T1 a = T1()) {
return;
}
diff --git a/clang/test/OpenMP/reduction_complex.c b/clang/test/OpenMP/reduction_complex.c
index e00caa8f90fdf..cb3da8654767e 100644
--- a/clang/test/OpenMP/reduction_complex.c
+++ b/clang/test/OpenMP/reduction_complex.c
@@ -24,73 +24,733 @@ int foo() {
return 0;
}
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19
+// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[SUM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr
+// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr
+// CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr
+// CHECK-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr
+// CHECK-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr
+// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]]
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_kernel_environment, ptr [[DYN_PTR]])
+// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
+// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
+// CHECK: user_code.entry:
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[J_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP3]], ptr [[J_CASTED_ASCAST]], align 4
+// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8
+// CHECK-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_omp_outlined(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP4]], ptr [[TMP0]]) #[[ATTR2:[0-9]+]]
+// CHECK-NEXT: call void @__kmpc_target_deinit()
+// CHECK-NEXT: ret void
+// CHECK: worker.exit:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_omp_outlined
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[SUM:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[SUM1:%.*]] = alloca { float, float }, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[_TMP2:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[J4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[J_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr
+// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr
+// CHECK-NEXT: [[SUM1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM1]] to ptr
+// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr
+// CHECK-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr
+// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr
+// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-NEXT: [[J4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J4]] to ptr
+// CHECK-NEXT: [[J_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_CASTED]] to ptr
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT: [[SUM1_ASCAST_REALP:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM1_ASCAST]], i32 0, i32 0
+// CHECK-NEXT: [[SUM1_ASCAST_IMAGP:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM1_ASCAST]], i32 0, i32 1
+// CHECK-NEXT: store float 0.000000e+00, ptr [[SUM1_ASCAST_REALP]], align 4
+// CHECK-NEXT: store float 0.000000e+00, ptr [[SUM1_ASCAST_IMAGP]], align 4
+// CHECK-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-NEXT: store i32 99, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block()
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+// CHECK-NEXT: call void @__kmpc_distribute_static_init_4(ptr @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]])
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99
+// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK: cond.true:
+// CHECK-NEXT: br label [[COND_END:%.*]]
+// CHECK: cond.false:
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: br label [[COND_END]]
+// CHECK: cond.end:
+// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ]
+// CHECK-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK: omp.inner.for.cond:
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP6]], 100
+// CHECK-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK: omp.inner.for.body:
+// CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
+// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[J3_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP11]], ptr [[J_CASTED_ASCAST]], align 4
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[J_CASTED_ASCAST]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to ptr
+// CHECK-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1
+// CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to ptr
+// CHECK-NEXT: store ptr [[TMP16]], ptr [[TMP15]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2
+// CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP12]] to ptr
+// CHECK-NEXT: store ptr [[TMP18]], ptr [[TMP17]], align 8
+// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3
+// CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP19]], align 8
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4)
+// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK: omp.inner.for.inc:
+// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// CHECK-NEXT: store i32 [[ADD]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
+// CHECK-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP25]]
+// CHECK-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[TMP26]], 99
+// CHECK-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]]
+// CHECK: cond.true9:
+// CHECK-NEXT: br label [[COND_END11:%.*]]
+// CHECK: cond.false10:
+// CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: br label [[COND_END11]]
+// CHECK: cond.end11:
+// CHECK-NEXT: [[COND12:%.*]] = phi i32 [ 99, [[COND_TRUE9]] ], [ [[TMP27]], [[COND_FALSE10]] ]
+// CHECK-NEXT: store i32 [[COND12]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP28]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]]
+// CHECK: omp.inner.for.end:
+// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK: omp.loop.exit:
+// CHECK-NEXT: call void @__kmpc_distribute_static_fini(ptr @[[GLOB2]], i32 [[TMP2]])
+// CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0
+// CHECK-NEXT: br i1 [[TMP30]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
+// CHECK: .omp.lastprivate.then:
+// CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4
+// CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[J3_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP31]], ptr [[J_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
+// CHECK: .omp.lastprivate.done:
+// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP32]], align 8
+// CHECK-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer()
+// CHECK-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func)
+// CHECK-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP33]], 1
+// CHECK-NEXT: br i1 [[TMP34]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK: .omp.reduction.then:
+// CHECK-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4
+// CHECK-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4
+// CHECK-NEXT: [[SUM1_ASCAST_REALP13:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM1_ASCAST]], i32 0, i32 0
+// CHECK-NEXT: [[SUM1_ASCAST_REAL:%.*]] = load float, ptr [[SUM1_ASCAST_REALP13]], align 4
+// CHECK-NEXT: [[SUM1_ASCAST_IMAGP14:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM1_ASCAST]], i32 0, i32 1
+// CHECK-NEXT: [[SUM1_ASCAST_IMAG:%.*]] = load float, ptr [[SUM1_ASCAST_IMAGP14]], align 4
+// CHECK-NEXT: [[ADD_R:%.*]] = fadd float [[DOTREAL]], [[SUM1_ASCAST_REAL]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[SUM1_ASCAST_IMAG]]
+// CHECK-NEXT: [[DOTREALP15:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[DOTIMAGP16:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: store float [[ADD_R]], ptr [[DOTREALP15]], align 4
+// CHECK-NEXT: store float [[ADD_I]], ptr [[DOTIMAGP16]], align 4
+// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
+// CHECK: .omp.reduction.done:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_omp_outlined_omp_outlined
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[J:%.*]], ptr noundef nonnull align 4 dereferenceable(8) [[SUM:%.*]]) #[[ATTR1]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[J_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK-NEXT: [[SUM_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[_TMP1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[J3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[SUM4:%.*]] = alloca { float, float }, align 4, addrspace(5)
+// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[J5:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr
+// CHECK-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr
+// CHECK-NEXT: [[J_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J_ADDR]] to ptr
+// CHECK-NEXT: [[SUM_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM_ADDR]] to ptr
+// CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK-NEXT: [[J3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J3]] to ptr
+// CHECK-NEXT: [[SUM4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SUM4]] to ptr
+// CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK-NEXT: [[J5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[J5]] to ptr
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store i64 [[J]], ptr [[J_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[SUM]], ptr [[SUM_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[SUM_ADDR_ASCAST]], align 8, !nonnull [[META5]], !align [[META6]]
+// CHECK-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-NEXT: store i32 99, ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32
+// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP2]] to i32
+// CHECK-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[CONV2]], ptr [[DOTOMP_UB_ASCAST]], align 4
+// CHECK-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4
+// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-NEXT: [[SUM4_ASCAST_REALP:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM4_ASCAST]], i32 0, i32 0
+// CHECK-NEXT: [[SUM4_ASCAST_IMAGP:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM4_ASCAST]], i32 0, i32 1
+// CHECK-NEXT: store float 0.000000e+00, ptr [[SUM4_ASCAST_REALP]], align 4
+// CHECK-NEXT: store float 0.000000e+00, ptr [[SUM4_ASCAST_IMAGP]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP4]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1)
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV_ASCAST]], align 4
+// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// CHECK: omp.inner.for.cond:
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7:![0-9]+]]
+// CHECK-NEXT: [[CONV6:%.*]] = sext i32 [[TMP6]] to i64
+// CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV6]], [[TMP7]]
+// CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// CHECK: omp.inner.for.body:
+// CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10
+// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
+// CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[DIV7:%.*]] = sdiv i32 [[TMP10]], 10
+// CHECK-NEXT: [[MUL8:%.*]] = mul nsw i32 [[DIV7]], 10
+// CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL8]]
+// CHECK-NEXT: [[MUL9:%.*]] = mul nsw i32 [[SUB]], 1
+// CHECK-NEXT: [[ADD10:%.*]] = add nsw i32 0, [[MUL9]]
+// CHECK-NEXT: store i32 [[ADD10]], ptr [[J3_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP11]] to float
+// CHECK-NEXT: [[SUM4_ASCAST_REALP12:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM4_ASCAST]], i32 0, i32 0
+// CHECK-NEXT: [[SUM4_ASCAST_REAL:%.*]] = load float, ptr [[SUM4_ASCAST_REALP12]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[SUM4_ASCAST_IMAGP13:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM4_ASCAST]], i32 0, i32 1
+// CHECK-NEXT: [[SUM4_ASCAST_IMAG:%.*]] = load float, ptr [[SUM4_ASCAST_IMAGP13]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[ADD_R:%.*]] = fadd float [[SUM4_ASCAST_REAL]], [[CONV11]]
+// CHECK-NEXT: [[SUM4_ASCAST_REALP14:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM4_ASCAST]], i32 0, i32 0
+// CHECK-NEXT: [[SUM4_ASCAST_IMAGP15:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM4_ASCAST]], i32 0, i32 1
+// CHECK-NEXT: store float [[ADD_R]], ptr [[SUM4_ASCAST_REALP14]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: store float [[SUM4_ASCAST_IMAG]], ptr [[SUM4_ASCAST_IMAGP15]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// CHECK: omp.body.continue:
+// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// CHECK: omp.inner.for.inc:
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP12]], [[TMP13]]
+// CHECK-NEXT: store i32 [[ADD16]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP7]]
+// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK: omp.inner.for.end:
+// CHECK-NEXT: br label [[OMP_LOOP_EXIT:%.*]]
+// CHECK: omp.loop.exit:
+// CHECK-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB3]], i32 [[TMP4]])
+// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[SUM4_ASCAST]], ptr [[TMP14]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
+// CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1
+// CHECK-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
+// CHECK: .omp.reduction.then:
+// CHECK-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4
+// CHECK-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4
+// CHECK-NEXT: [[SUM4_ASCAST_REALP17:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM4_ASCAST]], i32 0, i32 0
+// CHECK-NEXT: [[SUM4_ASCAST_REAL18:%.*]] = load float, ptr [[SUM4_ASCAST_REALP17]], align 4
+// CHECK-NEXT: [[SUM4_ASCAST_IMAGP19:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[SUM4_ASCAST]], i32 0, i32 1
+// CHECK-NEXT: [[SUM4_ASCAST_IMAG20:%.*]] = load float, ptr [[SUM4_ASCAST_IMAGP19]], align 4
+// CHECK-NEXT: [[ADD_R21:%.*]] = fadd float [[DOTREAL]], [[SUM4_ASCAST_REAL18]]
+// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[DOTIMAG]], [[SUM4_ASCAST_IMAG20]]
+// CHECK-NEXT: [[DOTREALP22:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[DOTIMAGP23:%.*]] = getelementptr inbounds nuw { float, float }, ptr [[TMP0]], i32 0, i32 1
+// CHECK-NEXT: store float [[ADD_R21]], ptr [[DOTREALP22]], align 4
+// CHECK-NEXT: store float [[ADD_I]], ptr [[DOTIMAGP23]], align 4
+// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
+// CHECK: .omp.reduction.done:
+// CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = icmp ne i32 [[TMP17]], 0
+// CHECK-NEXT: br i1 [[TMP18]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]]
+// CHECK: .omp.lastprivate.then:
+// CHECK-NEXT: store i32 10, ptr [[J3_ASCAST]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[J3_ASCAST]], align 4
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[J_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]]
+// CHECK: .omp.lastprivate.done:
+// CHECK-NEXT: ret void
+//
+//
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
-// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca { float, float }, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr { float, float }, ptr [[TMP9]], i64 1
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
+// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]]
+// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1
+// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0
+// CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]]
+// CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0
+// CHECK-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
+// CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]]
+// CHECK-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
+// CHECK-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK: then:
+// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]]
+// CHECK-NEXT: br label [[IFCONT:%.*]]
+// CHECK: else:
+// CHECK-NEXT: br label [[IFCONT]]
+// CHECK: ifcont:
+// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
+// CHECK-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE7:%.*]]
+// CHECK: then4:
+// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
+// CHECK-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP34]], i32 0, i32 0
+// CHECK-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4
+// CHECK-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP34]], i32 0, i32 1
+// CHECK-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4
+// CHECK-NEXT: [[DOTREALP5:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP36]], i32 0, i32 0
+// CHECK-NEXT: [[DOTIMAGP6:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP36]], i32 0, i32 1
+// CHECK-NEXT: store float [[DOTREAL]], ptr [[DOTREALP5]], align 4
+// CHECK-NEXT: store float [[DOTIMAG]], ptr [[DOTIMAGP6]], align 4
+// CHECK-NEXT: br label [[IFCONT8:%.*]]
+// CHECK: else7:
+// CHECK-NEXT: br label [[IFCONT8]]
+// CHECK: ifcont8:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
+// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
+// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label [[PRECOND:%.*]]
+// CHECK: precond:
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
+// CHECK-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK: body:
+// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK: then:
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
+// CHECK-NEXT: br label [[IFCONT:%.*]]
+// CHECK: else:
+// CHECK-NEXT: br label [[IFCONT]]
+// CHECK: ifcont:
+// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
+// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
+// CHECK: then3:
+// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
+// CHECK-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
+// CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
+// CHECK-NEXT: br label [[IFCONT5:%.*]]
+// CHECK: else4:
+// CHECK-NEXT: br label [[IFCONT5]]
+// CHECK: ifcont5:
+// CHECK-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label [[PRECOND]]
+// CHECK: exit:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1
+// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca { float, float }, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr { float, float }, ptr [[TMP9]], i64 1
+// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
+// CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
+// CHECK-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
+// CHECK-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
+// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
+// CHECK-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
+// CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]]
+// CHECK-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2
+// CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1
+// CHECK-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0
+// CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]]
+// CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0
+// CHECK-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]]
+// CHECK-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]]
+// CHECK-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
+// CHECK-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK: then:
+// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR2]]
+// CHECK-NEXT: br label [[IFCONT:%.*]]
+// CHECK: else:
+// CHECK-NEXT: br label [[IFCONT]]
+// CHECK: ifcont:
+// CHECK-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1
+// CHECK-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]]
+// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
+// CHECK-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE7:%.*]]
+// CHECK: then4:
+// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
+// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
+// CHECK-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP34]], i32 0, i32 0
+// CHECK-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4
+// CHECK-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP34]], i32 0, i32 1
+// CHECK-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4
+// CHECK-NEXT: [[DOTREALP5:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP36]], i32 0, i32 0
+// CHECK-NEXT: [[DOTIMAGP6:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP36]], i32 0, i32 1
+// CHECK-NEXT: store float [[DOTREAL]], ptr [[DOTREALP5]], align 4
+// CHECK-NEXT: store float [[DOTIMAG]], ptr [[DOTIMAGP6]], align 4
+// CHECK-NEXT: br label [[IFCONT8:%.*]]
+// CHECK: else7:
+// CHECK-NEXT: br label [[IFCONT8]]
+// CHECK: ifcont8:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2
+// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
+// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label [[PRECOND:%.*]]
+// CHECK: precond:
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
+// CHECK-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
+// CHECK: body:
+// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+// CHECK-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0
+// CHECK-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]]
+// CHECK: then:
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]]
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]]
+// CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4
+// CHECK-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4
+// CHECK-NEXT: br label [[IFCONT:%.*]]
+// CHECK: else:
+// CHECK-NEXT: br label [[IFCONT]]
+// CHECK: ifcont:
+// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
+// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB4]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
+// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
+// CHECK: then3:
+// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]]
+// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i64 0, i64 0
+// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]]
+// CHECK-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4
+// CHECK-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4
+// CHECK-NEXT: br label [[IFCONT5:%.*]]
+// CHECK: else4:
+// CHECK-NEXT: br label [[IFCONT5]]
+// CHECK: ifcont5:
+// CHECK-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
+// CHECK-NEXT: br label [[PRECOND]]
+// CHECK: exit:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func
+// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
+// CHECK-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 0
+// CHECK-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4
+// CHECK-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 1
+// CHECK-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4
+// CHECK-NEXT: [[DOTREALP3:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP9]], i32 0, i32 0
+// CHECK-NEXT: [[DOTIMAGP4:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP9]], i32 0, i32 1
+// CHECK-NEXT: store float [[DOTREAL]], ptr [[DOTREALP3]], align 4
+// CHECK-NEXT: store float [[DOTIMAG]], ptr [[DOTIMAGP4]], align 4
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func
+// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP8]]) #[[ATTR2]]
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func
+// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i64 0, i64 0
+// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]]
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0
+// CHECK-NEXT: [[DOTREALP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP9]], i32 0, i32 0
+// CHECK-NEXT: [[DOTREAL:%.*]] = load float, ptr [[DOTREALP]], align 4
+// CHECK-NEXT: [[DOTIMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP9]], i32 0, i32 1
+// CHECK-NEXT: [[DOTIMAG:%.*]] = load float, ptr [[DOTIMAGP]], align 4
+// CHECK-NEXT: [[DOTREALP3:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 0
+// CHECK-NEXT: [[DOTIMAGP4:%.*]] = getelementptr inbounds { float, float }, ptr [[TMP7]], i32 0, i32 1
+// CHECK-NEXT: store float [[DOTREAL]], ptr [[DOTREALP3]], align 4
+// CHECK-NEXT: store float [[DOTIMAG]], ptr [[DOTIMAGP4]], align 4
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func
+// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: %[[VAL_228:.*]] = alloca ptr, align 8
-// CHECK-NEXT: %[[VAL_229:.*]] = alloca i16, align 2
-// CHECK-NEXT: %[[VAL_230:.*]] = alloca i16, align 2
-// CHECK-NEXT: %[[VAL_231:.*]] = alloca i16, align 2
-// CHECK-NEXT: %[[VAL_232:.*]] = alloca [1 x ptr], align 8
-// CHECK-NEXT: %[[VAL_233:.*]] = alloca { float, float }, align 8
-// CHECK-NEXT: store ptr %[[VAL_234:.*]], ptr %[[VAL_228]], align 8
-// CHECK-NEXT: store i16 %[[VAL_235:.*]], ptr %[[VAL_229]], align 2
-// CHECK-NEXT: store i16 %[[VAL_236:.*]], ptr %[[VAL_230]], align 2
-// CHECK-NEXT: store i16 %[[VAL_237:.*]], ptr %[[VAL_231]], align 2
-// CHECK-NEXT: %[[VAL_238:.*]] = load ptr, ptr %[[VAL_228]], align 8
-// CHECK-NEXT: %[[VAL_239:.*]] = load i16, ptr %[[VAL_229]], align 2
-// CHECK-NEXT: %[[VAL_240:.*]] = load i16, ptr %[[VAL_230]], align 2
-// CHECK-NEXT: %[[VAL_241:.*]] = load i16, ptr %[[VAL_231]], align 2
-// CHECK-NEXT: %[[VAL_242:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_238]], i64 0, i64 0
-// CHECK-NEXT: %[[VAL_243:.*]] = load ptr, ptr %[[VAL_242]], align 8
-// CHECK-NEXT: %[[VAL_244:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_232]], i64 0, i64 0
-// CHECK-NEXT: %[[VAL_245:.*]] = getelementptr { float, float }, ptr %[[VAL_243]], i64 1
-// CHECK-NEXT: %[[VAL_246:.*]] = load i64, ptr %[[VAL_243]], align 8
-// CHECK-NEXT: %[[VAL_247:.*]] = call i32 @__kmpc_get_warp_size()
-// CHECK-NEXT: %[[VAL_248:.*]] = trunc i32 %[[VAL_247]] to i16
-// CHECK-NEXT: %[[VAL_249:.*]] = call i64 @__kmpc_shuffle_int64(i64 %[[VAL_246]], i16 %[[VAL_240]], i16 %[[VAL_248]])
-// CHECK-NEXT: store i64 %[[VAL_249]], ptr %[[VAL_233]], align 8
-// CHECK-NEXT: %[[VAL_250:.*]] = getelementptr i64, ptr %[[VAL_243]], i64 1
-// CHECK-NEXT: %[[VAL_251:.*]] = getelementptr i64, ptr %[[VAL_233]], i64 1
-// CHECK-NEXT: store ptr %[[VAL_233]], ptr %[[VAL_244]], align 8
-// CHECK-NEXT: %[[VAL_252:.*]] = icmp eq i16 %[[VAL_241]], 0
-// CHECK-NEXT: %[[VAL_253:.*]] = icmp eq i16 %[[VAL_241]], 1
-// CHECK-NEXT: %[[VAL_254:.*]] = icmp ult i16 %[[VAL_239]], %[[VAL_240]]
-// CHECK-NEXT: %[[VAL_255:.*]] = and i1 %[[VAL_253]], %[[VAL_254]]
-// CHECK-NEXT: %[[VAL_256:.*]] = icmp eq i16 %[[VAL_241]], 2
-// CHECK-NEXT: %[[VAL_257:.*]] = and i16 %[[VAL_239]], 1
-// CHECK-NEXT: %[[VAL_258:.*]] = icmp eq i16 %[[VAL_257]], 0
-// CHECK-NEXT: %[[VAL_259:.*]] = and i1 %[[VAL_256]], %[[VAL_258]]
-// CHECK-NEXT: %[[VAL_260:.*]] = icmp sgt i16 %[[VAL_240]], 0
-// CHECK-NEXT: %[[VAL_261:.*]] = and i1 %[[VAL_259]], %[[VAL_260]]
-// CHECK-NEXT: %[[VAL_262:.*]] = or i1 %[[VAL_252]], %[[VAL_255]]
-// CHECK-NEXT: %[[VAL_263:.*]] = or i1 %[[VAL_262]], %[[VAL_261]]
-// CHECK-NEXT: br i1 %[[VAL_263]], label %[[VAL_264:.*]], label %[[VAL_265:.*]]
-// CHECK: then: ; preds = %[[VAL_266:.*]]
-// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l{{[0-9]+}}_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr %[[VAL_238]], ptr %[[VAL_232]]) #2
-// CHECK-NEXT: br label %[[VAL_267:.*]]
-// CHECK: else: ; preds = %[[VAL_266]]
-// CHECK-NEXT: br label %[[VAL_267]]
-// CHECK: ifcont: ; preds = %[[VAL_265]], %[[VAL_264]]
-// CHECK-NEXT: %[[VAL_268:.*]] = icmp eq i16 %[[VAL_241]], 1
-// CHECK-NEXT: %[[VAL_269:.*]] = icmp uge i16 %[[VAL_239]], %[[VAL_240]]
-// CHECK-NEXT: %[[VAL_270:.*]] = and i1 %[[VAL_268]], %[[VAL_269]]
-// CHECK-NEXT: br i1 %[[VAL_270]], label %[[VAL_271:.*]], label %[[VAL_272:.*]]
-// CHECK: then4: ; preds = %[[VAL_267]]
-// CHECK-NEXT: %[[VAL_273:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_232]], i64 0, i64 0
-// CHECK-NEXT: %[[VAL_274:.*]] = load ptr, ptr %[[VAL_273]], align 8
-// CHECK-NEXT: %[[VAL_275:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_238]], i64 0, i64 0
-// CHECK-NEXT: %[[VAL_276:.*]] = load ptr, ptr %[[VAL_275]], align 8
-// CHECK-NEXT: %[[VAL_277:.*]] = getelementptr inbounds { float, float }, ptr %[[VAL_274]], i32 0, i32 0
-// CHECK-NEXT: %[[VAL_278:.*]] = load float, ptr %[[VAL_277]], align 4
-// CHECK-NEXT: %[[VAL_279:.*]] = getelementptr inbounds { float, float }, ptr %[[VAL_274]], i32 0, i32 1
-// CHECK-NEXT: %[[VAL_280:.*]] = load float, ptr %[[VAL_279]], align 4
-// CHECK-NEXT: %[[VAL_281:.*]] = getelementptr inbounds { float, float }, ptr %[[VAL_276]], i32 0, i32 0
-// CHECK-NEXT: %[[VAL_282:.*]] = getelementptr inbounds { float, float }, ptr %[[VAL_276]], i32 0, i32 1
-// CHECK-NEXT: store float %[[VAL_278]], ptr %[[VAL_281]], align 4
-// CHECK-NEXT: store float %[[VAL_280]], ptr %[[VAL_282]], align 4
-// CHECK-NEXT: br label %[[VAL_283:.*]]
-// CHECK: else7: ; preds = %[[VAL_267]]
-// CHECK-NEXT: br label %[[VAL_283]]
-// CHECK: ifcont8: ; preds = %[[VAL_272]], %[[VAL_271]]
-// CHECK-NEXT: ret void
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]]
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8
+// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP8]], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]]) #[[ATTR2]]
+// CHECK-NEXT: ret void
+//
diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index a7db3da7d1f86..aa748ad22f4ed 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -100,20 +100,23 @@ int main()
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32
// CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef [[E:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8
-// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
+// CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_kernel_environment, ptr [[DYN_PTR]])
// CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1
// CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
// CHECK: user_code.entry:
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0
// CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP3]], align 8
-// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 1)
+// CHECK-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 1)
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
// CHECK: worker.exit:
@@ -123,37 +126,43 @@ int main()
// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined
// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef [[E:%.*]]) #[[ATTR1:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[E2:%.*]] = alloca double, align 8
-// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[E2:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr
+// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr
+// CHECK-NEXT: [[E2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E2]] to ptr
+// CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[TMP0]], i64 0
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw double, ptr [[TMP1]], i64 0
-// CHECK-NEXT: store double 0.000000e+00, ptr [[E2]], align 8
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR]], align 8
+// CHECK-NEXT: store double 0.000000e+00, ptr [[E2_ASCAST]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
// CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64
// CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP3]], [[TMP4]]
// CHECK-NEXT: [[TMP6:%.*]] = sdiv exact i64 [[TMP5]], ptrtoint (ptr getelementptr (double, ptr null, i32 1) to i64)
-// CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[E2]], i64 [[TMP6]]
-// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP]], align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[E2_ASCAST]], i64 [[TMP6]]
+// CHECK-NEXT: store ptr [[TMP7]], ptr [[TMP_ASCAST]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8
// CHECK-NEXT: store double 1.000000e+01, ptr [[TMP8]], align 8
-// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
-// CHECK-NEXT: store ptr [[E2]], ptr [[TMP9]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0
+// CHECK-NEXT: store ptr [[E2_ASCAST]], ptr [[TMP9]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func)
// CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1
// CHECK-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]]
// CHECK: .omp.reduction.then:
// CHECK-NEXT: [[TMP12:%.*]] = load double, ptr [[ARRAYIDX]], align 8
-// CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[E2]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load double, ptr [[E2_ASCAST]], align 8
// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[TMP13]]
// CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX]], align 8
// CHECK-NEXT: br label [[DOTOMP_REDUCTION_DONE]]
@@ -164,32 +173,38 @@ int main()
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR2:[0-9]+]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2
-// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8
-// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8
-// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2
-// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2
-// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2
-// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2
-// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2
-// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTOMP_REDUCTION_ELEMENT_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_ELEMENT]] to ptr
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr
+// CHECK-NEXT: [[DOTADDR3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR3]] to ptr
+// CHECK-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3_ASCAST]], align 2
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1_ASCAST]], align 2
+// CHECK-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2_ASCAST]], align 2
+// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3_ASCAST]], align 2
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8
-// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 1
// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size()
// CHECK-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
// CHECK-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]])
-// CHECK-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8
+// CHECK-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], align 8
// CHECK-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i64 1
-// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i64 1
-// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], i64 1
+// CHECK-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT_ASCAST]], ptr [[TMP10]], align 8
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0
// CHECK-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1
// CHECK-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]]
@@ -204,7 +219,7 @@ int main()
// CHECK-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]]
// CHECK-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]]
// CHECK: then:
-// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l32_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]]) #[[ATTR3:[0-9]+]]
// CHECK-NEXT: br label [[IFCONT:%.*]]
// CHECK: else:
// CHECK-NEXT: br label [[IFCONT]]
@@ -214,7 +229,7 @@ int main()
// CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]]
// CHECK-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]]
// CHECK: then4:
-// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0
+// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST_ASCAST]], i64 0, i64 0
// CHECK-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i64 0, i64 0
// CHECK-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 8
@@ -230,21 +245,24 @@ int main()
// CHECK-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func
// CHECK-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR2]] {
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8
-// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4
-// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8
-// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
+// CHECK-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[DOTCNT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCNT_ADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr
+// CHECK-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr
+// CHECK-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31
// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
// CHECK-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5
-// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 8
-// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4
+// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8
+// CHECK-NEXT: store i32 0, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-NEXT: br label [[PRECOND:%.*]]
// CHECK: precond:
-// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4
+// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2
// CHECK-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]]
// CHECK: body:
@@ -265,7 +283,7 @@ int main()
// CHECK: ifcont:
// CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]])
// CHECK-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
-// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4
+// CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4
// CHECK-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]]
// CHECK-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]]
// CHECK: then3:
@@ -280,7 +298,7 @@ int main()
// CHECK-NEXT: br label [[IFCONT5]]
// CHECK: ifcont5:
// CHECK-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4
+// CHECK-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR_ASCAST]], align 4
// CHECK-NEXT: br label [[PRECOND]]
// CHECK: exit:
// CHECK-NEXT: ret void
@@ -410,7 +428,7 @@ int main()
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[O_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META7:![0-9]+]], !align [[META8:![0-9]+]]
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l50.omp_outlined, ptr [[TMP0]])
// CHECK1-NEXT: ret void
//
@@ -427,7 +445,7 @@ int main()
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[O]], ptr [[O_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[O_ADDR]], align 8, !nonnull [[META7]], !align [[META8]]
// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x %class.S2], ptr [[TMP0]], i64 0, i64 0
// CHECK1-NEXT: call void @_ZN2S2C1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[O1]])
// CHECK1-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
@@ -447,7 +465,7 @@ int main()
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[I]], align 4
-// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
// CHECK1: for.end:
// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0
// CHECK1-NEXT: store ptr [[O1]], ptr [[TMP8]], align 8
@@ -498,7 +516,7 @@ int main()
// CHECK1-NEXT: entry:
// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META7]], !align [[META13:![0-9]+]]
// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l55.omp_outlined, ptr [[TMP0]])
// CHECK1-NEXT: ret void
//
@@ -524,7 +542,7 @@ int main()
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META7]], !align [[META13]]
// CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
// CHECK1-NEXT: store i64 9, ptr [[DOTOMP_UB]], align 8
// CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
@@ -1855,7 +1873,7 @@ int main()
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META14:![0-9]+]], !align [[META15:![0-9]+]]
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
@@ -1877,7 +1895,7 @@ int main()
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META14]], !align [[META15]]
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[TMP0]], i32 0, i32 0
// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw [10 x i32], ptr [[TMP0]], i32 0, i32 1
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x i32], ptr [[A2]], i32 0, i32 0
@@ -1909,7 +1927,7 @@ int main()
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
-// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
// CHECK2: for.end:
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A2]], ptr [[TMP10]], align 4
@@ -1996,7 +2014,7 @@ int main()
// CHECK2-NEXT: [[SIZE_CASTED:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META14]], !align [[META15]]
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[SIZE_CASTED]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIZE_CASTED]], align 4
@@ -2018,7 +2036,7 @@ int main()
// CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4
// CHECK2-NEXT: store i32 [[SIZE]], ptr [[SIZE_ADDR]], align 4
// CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4
+// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META14]], !align [[META15]]
// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 3
// CHECK2-NEXT: store i32 0, ptr [[A1]], align 4
// CHECK2-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
@@ -2039,7 +2057,7 @@ int main()
// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[I]], align 4
-// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]]
// CHECK2: for.end:
// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0
// CHECK2-NEXT: store ptr [[A1]], ptr [[TMP9]], align 4
diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp
index ec26cf2186285..d4566f4bddf88 100644
--- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp
@@ -65,657 +65,761 @@ int main() {
return 0;
}
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG29:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG20:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META52:![0-9]+]], !DIExpression(), [[META53:![0-9]+]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META54:![0-9]+]], !DIExpression(), [[META55:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META56:![0-9]+]], !DIExpression(), [[META57:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG58:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG58]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG58]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG58]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG58]]
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG58]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG58]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG58]]
-// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG58]]
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG58]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG58]]
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG58]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG58]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG58]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META39:![0-9]+]], !DIExpression(), [[META40:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META42:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META43:![0-9]+]], !DIExpression(), [[META44:![0-9]+]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META45:![0-9]+]], !DIExpression(), [[META46:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META47:![0-9]+]], !DIExpression(), [[META48:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG49:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG49]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG49]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG49]], !nonnull [[META38:![0-9]+]], !align [[META50:![0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG49]]
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG49]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG49]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG49]]
+// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG49]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG49]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG49]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG49]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG49]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG49]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG59:![0-9]+]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG61:![0-9]+]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG61]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG61]]
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG61]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG61]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG61]]
-// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG61]]
-// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG61]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG61]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG61]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG61]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG61]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG61]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG62:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG63:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG51:![0-9]+]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG53:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED_ASCAST]], align 4, !dbg [[DBG53]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8, !dbg [[DBG53]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG53]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG53]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG53]]
+// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG53]]
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG53]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG53]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG53]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG53]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG53]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG53]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG54:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG55:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG58]]
+// CHECK1-NEXT: ret void, !dbg [[DBG49]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG64:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG56:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META72:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META72]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META74:![0-9]+]], !DIExpression(), [[META72]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META75:![0-9]+]], !DIExpression(), [[META72]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META76:![0-9]+]], !DIExpression(), [[META72]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG77:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG77]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG77]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG77]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG77]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG77]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG77]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG77]]
-// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG77]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG77]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP9]]) #[[ATTR3:[0-9]+]], !dbg [[DBG77]]
-// CHECK1-NEXT: ret void, !dbg [[DBG77]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META64:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META65:![0-9]+]], !DIExpression(), [[META64]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META66:![0-9]+]], !DIExpression(), [[META64]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META67:![0-9]+]], !DIExpression(), [[META64]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META64]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG69:![0-9]+]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG69]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG69]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG69]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG69]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG69]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG69]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG69]]
+// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG69]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG69]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP9]]) #[[ATTR3:[0-9]+]], !dbg [[DBG69]]
+// CHECK1-NEXT: ret void, !dbg [[DBG69]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG78:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG70:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B3:%.*]] = alloca [10 x [10 x i32]], align 4
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META85:![0-9]+]], !DIExpression(), [[META86:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META87:![0-9]+]], !DIExpression(), [[META86]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META92:![0-9]+]], !DIExpression(), [[META93:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META94:![0-9]+]], !DIExpression(), [[META95:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG96:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG96]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG96]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG96]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG96]]
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG96]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG96]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG96]]
-// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG96]]
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG96]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG96]]
-// CHECK1-NEXT: #dbg_declare(ptr [[B3]], [[META97:![0-9]+]], !DIExpression(), [[META86]])
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B3]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG96]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META98:![0-9]+]], !DIExpression(), [[META101:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG102:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG102]]
-// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG102]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[META101]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META103:![0-9]+]], !DIExpression(), [[META104:![0-9]+]])
-// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META104]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META105:![0-9]+]], !DIExpression(), [[META106:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 1, !dbg [[DBG107:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG107]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[META106]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META108:![0-9]+]], !DIExpression(), [[META109:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META109]]
-// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG110:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG111:![0-9]+]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG112:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64, !dbg [[DBG111]]
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG111]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG113:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG114:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG114]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG115:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG114]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG114]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG116:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG117:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG117]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG118:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG117]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG117]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG117]]
-// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG119:![0-9]+]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG120:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG119]]
-// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG119]]
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG121:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG122:![0-9]+]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG123:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG122]]
-// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG122]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG122]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG124:![0-9]+]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP15]] to i1, !dbg [[DBG124]]
-// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG124]]
-// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP14]], !dbg [[DBG124]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG124]]
-// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG124]]
-// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP7]], align 1, !dbg [[DBG124]]
-// CHECK1-NEXT: ret void, !dbg [[DBG125:![0-9]+]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B3:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[B3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B3]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META77:![0-9]+]], !DIExpression(), [[META78:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META79:![0-9]+]], !DIExpression(), [[META78]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META80:![0-9]+]], !DIExpression(), [[META81:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META82:![0-9]+]], !DIExpression(), [[META83:![0-9]+]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META84:![0-9]+]], !DIExpression(), [[META85:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META86:![0-9]+]], !DIExpression(), [[META87:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG88:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG88]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG88]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG88]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG88]]
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG88]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG88]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG88]]
+// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG88]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG88]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG88]], !nonnull [[META38]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B3]], [[META89:![0-9]+]], !DIExpression(), [[META78]])
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B3_ASCAST]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG88]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META90:![0-9]+]], !DIExpression(), [[META93:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG94:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG94]]
+// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG94]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F_ASCAST]], align 8, !dbg [[META93]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META95:![0-9]+]], !DIExpression(), [[META96:![0-9]+]])
+// CHECK1-NEXT: store ptr [[A_ADDR_ASCAST]], ptr [[G_ASCAST]], align 8, !dbg [[META96]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META97:![0-9]+]], !DIExpression(), [[META98:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3_ASCAST]], i64 0, i64 1, !dbg [[DBG99:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG99]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H_ASCAST]], align 8, !dbg [[META98]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META100:![0-9]+]], !DIExpression(), [[META101:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META101]]
+// CHECK1-NEXT: store i32 5, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG102:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3_ASCAST]], i64 0, i64 0, !dbg [[DBG103:![0-9]+]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG104:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64, !dbg [[DBG103]]
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG103]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG105:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG106:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG106]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG107:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG106]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG106]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG108:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG109:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG109]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG110:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG109]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG109]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG109]]
+// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3_ASCAST]], i64 0, i64 0, !dbg [[DBG111:![0-9]+]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG112:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG111]]
+// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG111]]
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG113:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3_ASCAST]], i64 0, i64 0, !dbg [[DBG114:![0-9]+]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG115:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG114]]
+// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG114]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG114]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG116:![0-9]+]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP15]] to i1, !dbg [[DBG116]]
+// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG116]]
+// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP14]], !dbg [[DBG116]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG116]]
+// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG116]]
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP7]], align 1, !dbg [[DBG116]]
+// CHECK1-NEXT: ret void, !dbg [[DBG117:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG126:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG118:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META130:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META130]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META130]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META130]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META130]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META135:![0-9]+]], !DIExpression(), [[META130]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG136:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG136]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG136]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG136]]
-// CHECK1-NEXT: ret void, !dbg [[DBG136]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META121:![0-9]+]], !DIExpression(), [[META122:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META123:![0-9]+]], !DIExpression(), [[META122]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META124:![0-9]+]], !DIExpression(), [[META122]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META122]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META126:![0-9]+]], !DIExpression(), [[META122]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META122]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG128:![0-9]+]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG128]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG128]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG128]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG128]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG128]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG128]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG128]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG128]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG128]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG128]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG128]]
+// CHECK1-NEXT: ret void, !dbg [[DBG128]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG137:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG129:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META142:![0-9]+]], !DIExpression(), [[META143:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META145:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META146:![0-9]+]], !DIExpression(), [[META147:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META148:![0-9]+]], !DIExpression(), [[META149:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META150:![0-9]+]], !DIExpression(), [[META151:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG152:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG152]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG152]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG152]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG152]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG152]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG152]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG152]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG152]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG152]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG152]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG152]]
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG152]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG152]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG152]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META135:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META136:![0-9]+]], !DIExpression(), [[META137:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META138:![0-9]+]], !DIExpression(), [[META139:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META140:![0-9]+]], !DIExpression(), [[META141:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META142:![0-9]+]], !DIExpression(), [[META143:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG144:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG144]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG144]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG144]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG144]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG144]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG144]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG144]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG144]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG144]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG144]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG144]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG144]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG144]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG144]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]]), !dbg [[DBG153:![0-9]+]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG155:![0-9]+]]
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG155]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG155]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG155]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG155]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG155]]
-// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG155]]
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG155]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG155]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG155]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG155]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG155]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB7]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG155]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG156:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG157:![0-9]+]]
+// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]]), !dbg [[DBG145:![0-9]+]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG147:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED_ASCAST]], align 4, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG147]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG147]]
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG147]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG147]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG147]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG147]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB7]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG147]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG148:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG149:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG152]]
+// CHECK1-NEXT: ret void, !dbg [[DBG144]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG158:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG150:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META159:![0-9]+]], !DIExpression(), [[META160:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META161:![0-9]+]], !DIExpression(), [[META160]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META162:![0-9]+]], !DIExpression(), [[META160]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META163:![0-9]+]], !DIExpression(), [[META160]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META164:![0-9]+]], !DIExpression(), [[META160]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG165:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG165]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG165]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG165]]
-// CHECK1-NEXT: ret void, !dbg [[DBG165]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META152:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META153:![0-9]+]], !DIExpression(), [[META152]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META154:![0-9]+]], !DIExpression(), [[META152]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META155:![0-9]+]], !DIExpression(), [[META152]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META156:![0-9]+]], !DIExpression(), [[META152]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG157:![0-9]+]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG157]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG157]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG157]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG157]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG157]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG157]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG157]]
+// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG157]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG157]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG157]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG157]]
+// CHECK1-NEXT: ret void, !dbg [[DBG157]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG166:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG158:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META169:![0-9]+]], !DIExpression(), [[META170:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META171:![0-9]+]], !DIExpression(), [[META170]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META174:![0-9]+]], !DIExpression(), [[META175:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META176:![0-9]+]], !DIExpression(), [[META177:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META178:![0-9]+]], !DIExpression(), [[META179:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG180:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG180]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG180]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG180]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG180]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG180]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG180]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG180]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG180]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG180]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG180]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG180]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META181:![0-9]+]], !DIExpression(), [[META183:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG184:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG184]]
-// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG184]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[META183]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META185:![0-9]+]], !DIExpression(), [[META186:![0-9]+]])
-// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META186]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META187:![0-9]+]], !DIExpression(), [[META188:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG189:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG189]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[META188]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META190:![0-9]+]], !DIExpression(), [[META191:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META191]]
-// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG192:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG193]]
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG193]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG195:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG196:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG196]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG197:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG196]]
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG196]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG198:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG199:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG199]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG200:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG199]]
-// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG199]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG199]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG201:![0-9]+]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG202:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG201]]
-// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG201]]
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG203:![0-9]+]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG204:![0-9]+]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG204]]
-// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG204]]
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG205:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG206:![0-9]+]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META161:![0-9]+]], !DIExpression(), [[META162:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META163:![0-9]+]], !DIExpression(), [[META162]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META164:![0-9]+]], !DIExpression(), [[META165:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META166:![0-9]+]], !DIExpression(), [[META167:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META168:![0-9]+]], !DIExpression(), [[META169:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG172:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG172]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG172]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG172]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG172]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG172]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG172]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG172]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG172]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG172]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG172]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG172]], !nonnull [[META38]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META173:![0-9]+]], !DIExpression(), [[META175:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG176:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG176]]
+// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG176]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F_ASCAST]], align 8, !dbg [[META175]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META177:![0-9]+]], !DIExpression(), [[META178:![0-9]+]])
+// CHECK1-NEXT: store ptr [[A_ADDR_ASCAST]], ptr [[G_ASCAST]], align 8, !dbg [[META178]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META179:![0-9]+]], !DIExpression(), [[META180:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG181:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG181]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H_ASCAST]], align 8, !dbg [[META180]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META182:![0-9]+]], !DIExpression(), [[META183:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META183]]
+// CHECK1-NEXT: store i32 5, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG184:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG185:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG186:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG185]]
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG185]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG187:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG188:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG188]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG189:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG188]]
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG188]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG190:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG191:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG191]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG192:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG191]]
+// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG191]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG191]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG194:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG193]]
+// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG193]]
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG195:![0-9]+]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG196:![0-9]+]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG196]]
+// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG196]]
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[D_ASCAST]], align 4, !dbg [[DBG197:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG198:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG207:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG199:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META208:![0-9]+]], !DIExpression(), [[META209:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META210:![0-9]+]], !DIExpression(), [[META209]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META211:![0-9]+]], !DIExpression(), [[META209]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META212:![0-9]+]], !DIExpression(), [[META209]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META213:![0-9]+]], !DIExpression(), [[META209]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META214:![0-9]+]], !DIExpression(), [[META209]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG215:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG215]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG215]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG215]]
-// CHECK1-NEXT: ret void, !dbg [[DBG215]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META200:![0-9]+]], !DIExpression(), [[META201:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META202:![0-9]+]], !DIExpression(), [[META201]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META203:![0-9]+]], !DIExpression(), [[META201]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META204:![0-9]+]], !DIExpression(), [[META201]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META205:![0-9]+]], !DIExpression(), [[META201]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META206:![0-9]+]], !DIExpression(), [[META201]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG207:![0-9]+]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG207]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG207]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG207]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG207]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG207]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG207]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG207]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG207]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG207]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG207]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG207]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG207]]
+// CHECK1-NEXT: ret void, !dbg [[DBG207]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG216:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG208:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META221:![0-9]+]], !DIExpression(), [[META222:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META223:![0-9]+]], !DIExpression(), [[META224:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META225:![0-9]+]], !DIExpression(), [[META226:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META227:![0-9]+]], !DIExpression(), [[META228:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META229:![0-9]+]], !DIExpression(), [[META230:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG231:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG231]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG231]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG231]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG231]]
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG231]]
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG231]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG231]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG231]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META213:![0-9]+]], !DIExpression(), [[META214:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META215:![0-9]+]], !DIExpression(), [[META216:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META217:![0-9]+]], !DIExpression(), [[META218:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META219:![0-9]+]], !DIExpression(), [[META220:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META221:![0-9]+]], !DIExpression(), [[META222:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG223:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG223]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG223]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG223]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG223]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG223]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG223]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG223]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG223]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG223]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG223]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG223]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG223]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG223]]
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG223]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG223]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG223]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG223]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG223]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]]), !dbg [[DBG232:![0-9]+]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG234:![0-9]+]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG234]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG234]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG234]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG234]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG234]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG234]]
-// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG234]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG234]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG235:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG236:![0-9]+]]
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]]), !dbg [[DBG224:![0-9]+]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG226:![0-9]+]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG226]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG226]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG226]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG226]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG226]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG226]]
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG226]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG226]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG227:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG228:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG231]]
+// CHECK1-NEXT: ret void, !dbg [[DBG223]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG237:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG229:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META240:![0-9]+]], !DIExpression(), [[META241:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META242:![0-9]+]], !DIExpression(), [[META241]])
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META243:![0-9]+]], !DIExpression(), [[META241]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META244:![0-9]+]], !DIExpression(), [[META241]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META241]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG246:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG246]]
-// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG246]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG246]]
-// CHECK1-NEXT: ret void, !dbg [[DBG246]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META232:![0-9]+]], !DIExpression(), [[META233:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META234:![0-9]+]], !DIExpression(), [[META233]])
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META235:![0-9]+]], !DIExpression(), [[META233]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META236:![0-9]+]], !DIExpression(), [[META233]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META233]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG238:![0-9]+]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG238]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG238]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG238]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG238]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG238]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG238]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG238]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG238]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG238]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG238]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG238]]
+// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG238]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG238]]
+// CHECK1-NEXT: ret void, !dbg [[DBG238]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG247:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG239:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META250:![0-9]+]], !DIExpression(), [[META251:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META252:![0-9]+]], !DIExpression(), [[META251]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META253:![0-9]+]], !DIExpression(), [[META254:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META255:![0-9]+]], !DIExpression(), [[META256:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META257:![0-9]+]], !DIExpression(), [[META258:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META259:![0-9]+]], !DIExpression(), [[META260:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG261:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG261]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG261]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG261]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG261]]
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG261]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META262:![0-9]+]], !DIExpression(), [[META264:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG265:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG265]]
-// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG265]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[META264]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META266:![0-9]+]], !DIExpression(), [[META267:![0-9]+]])
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[META267]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META268:![0-9]+]], !DIExpression(), [[META269:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG270:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG270]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[META269]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META271:![0-9]+]], !DIExpression(), [[META272:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META272]]
-// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG273:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG274:![0-9]+]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG275:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG274]]
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG274]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG276:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG277:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG277]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG278:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG277]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG277]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG279:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG280:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG280]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG281:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG280]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG280]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG280]]
-// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG282:![0-9]+]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG283:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG282]]
-// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG282]]
-// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG284:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG285:![0-9]+]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG286:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP17]] to i64, !dbg [[DBG285]]
-// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP18]], 0, !dbg [[DBG285]]
-// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG287:![0-9]+]]
-// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP11]], align 1, !dbg [[DBG287]]
-// CHECK1-NEXT: ret void, !dbg [[DBG288:![0-9]+]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META242:![0-9]+]], !DIExpression(), [[META243:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META244:![0-9]+]], !DIExpression(), [[META243]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META247:![0-9]+]], !DIExpression(), [[META248:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META249:![0-9]+]], !DIExpression(), [[META250:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META251:![0-9]+]], !DIExpression(), [[META252:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG253:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG253]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG253]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG253]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG253]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG253]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG253]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG253]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG253]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG253]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG253]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG253]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG253]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG253]]
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG253]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG253]], !nonnull [[META38]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META254:![0-9]+]], !DIExpression(), [[META256:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG257:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG257]]
+// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG257]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F_ASCAST]], align 8, !dbg [[META256]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META258:![0-9]+]], !DIExpression(), [[META259:![0-9]+]])
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G_ASCAST]], align 8, !dbg [[META259]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META260:![0-9]+]], !DIExpression(), [[META261:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG262:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG262]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H_ASCAST]], align 8, !dbg [[META261]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META263:![0-9]+]], !DIExpression(), [[META264:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META264]]
+// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG265:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG266:![0-9]+]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG267:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG266]]
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG266]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG268:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG269:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG269]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG270:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG269]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG269]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG271:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG272:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG272]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG273:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP14]] to i64, !dbg [[DBG272]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG272]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG272]]
+// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG274:![0-9]+]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG275:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG274]]
+// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG274]]
+// CHECK1-NEXT: store i32 [[TMP15]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG276:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG277:![0-9]+]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG278:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP17]] to i64, !dbg [[DBG277]]
+// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP18]], 0, !dbg [[DBG277]]
+// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG279:![0-9]+]]
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP11]], align 1, !dbg [[DBG279]]
+// CHECK1-NEXT: ret void, !dbg [[DBG280:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG289:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG281:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META292:![0-9]+]], !DIExpression(), [[META293:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META294:![0-9]+]], !DIExpression(), [[META293]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META295:![0-9]+]], !DIExpression(), [[META293]])
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META296:![0-9]+]], !DIExpression(), [[META293]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META297:![0-9]+]], !DIExpression(), [[META293]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META298:![0-9]+]], !DIExpression(), [[META293]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG299:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG299]]
-// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG299]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG299]]
-// CHECK1-NEXT: ret void, !dbg [[DBG299]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META285:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META286:![0-9]+]], !DIExpression(), [[META285]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META287:![0-9]+]], !DIExpression(), [[META285]])
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META288:![0-9]+]], !DIExpression(), [[META285]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META289:![0-9]+]], !DIExpression(), [[META285]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META290:![0-9]+]], !DIExpression(), [[META285]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG291:![0-9]+]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG291]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG291]], !nonnull [[META38]], !align [[META50]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG291]], !nonnull [[META38]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG291]]
+// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG291]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG291]]
+// CHECK1-NEXT: ret void, !dbg [[DBG291]]
//
diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
index 5addc4928918f..6e7ce8f153ae7 100644
--- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp
@@ -55,888 +55,1015 @@ int main() {
return 0;
}
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG19:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG10:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i8, align 1
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META40:![0-9]+]], !DIExpression(), [[META41:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META42:![0-9]+]], !DIExpression(), [[META43:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META44:![0-9]+]], !DIExpression(), [[META45:![0-9]+]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META46:![0-9]+]], !DIExpression(), [[META47:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]])
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META31:![0-9]+]], !DIExpression(), [[META32:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META33:![0-9]+]], !DIExpression(), [[META34:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META35:![0-9]+]], !DIExpression(), [[META36:![0-9]+]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META37:![0-9]+]], !DIExpression(), [[META38:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META39:![0-9]+]], !DIExpression(), [[META40:![0-9]+]])
// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[DOTCAPTURE_EXPR_]] to i8
-// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR__ADDR]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG52:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG52]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG52]]
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG52]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG52]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG52]]
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META42:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG43:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG43]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG43]], !nonnull [[META30:![0-9]+]], !align [[META44:![0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG43]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG43]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG43]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG43]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG43]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG43]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG53:![0-9]+]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG55:![0-9]+]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG55]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG55]]
-// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG55]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG55]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG56:![0-9]+]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG56]]
-// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG55]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG55]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG57:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG58:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG45:![0-9]+]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG47:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED_ASCAST]], align 4, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG47]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG47]]
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG47]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG47]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !dbg [[DBG48:![0-9]+]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG48]]
+// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG47]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG47]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG49:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG50:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG52]]
+// CHECK1-NEXT: ret void, !dbg [[DBG43]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG59:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG51:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META66:![0-9]+]], !DIExpression(), [[META67:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META69:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR__ADDR]], [[META72:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG73:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG73]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP8]] to i1, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG73]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP9]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP10]], i1 [[LOADEDV]]) #[[ATTR3:[0-9]+]], !dbg [[DBG73]]
-// CHECK1-NEXT: ret void, !dbg [[DBG73]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META58:![0-9]+]], !DIExpression(), [[META59:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META61:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META62:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]], [[META64:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG65:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG65]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG65]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !dbg [[DBG65]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP8]] to i1, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG65]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP9]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP10]], i1 [[LOADEDV]]) #[[ATTR3:[0-9]+]], !dbg [[DBG65]]
+// CHECK1-NEXT: ret void, !dbg [[DBG65]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG74:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG66:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META81:![0-9]+]], !DIExpression(), [[META82:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META83:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META84:![0-9]+]], !DIExpression(), [[META85:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META86:![0-9]+]], !DIExpression(), [[META87:![0-9]+]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG92:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG92]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG92]]
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META93:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META94:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG95:![0-9]+]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META96:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META97:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META98:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: #dbg_declare(ptr [[B4]], [[META99:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG92]]
-// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META100:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG92]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG101:![0-9]+]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG92]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[B4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B4]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META74:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META75:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META76:![0-9]+]], !DIExpression(), [[META77:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META80:![0-9]+]], !DIExpression(), [[META81:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META82:![0-9]+]], !DIExpression(), [[META83:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG84:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG84]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG84]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG84]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG84]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG84]], !nonnull [[META30]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IV]], [[META85:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_LB]], [[META86:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG87:![0-9]+]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_UB]], [[META88:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_STRIDE]], [[META89:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IS_LAST]], [[META90:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B4]], [[META91:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4_ASCAST]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG84]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[I]], [[META92:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG84]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1), !dbg [[DBG93:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG84]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG95]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG95]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG87]]
+// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG87]]
// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG95]]
+// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG87]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG95]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG87]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG95]]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG92]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG92]]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG87]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG84]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG84]]
// CHECK1: omp.dispatch.body:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG92]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG84]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG92]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG92]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG84]]
+// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG84]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG102:![0-9]+]]
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG102]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG102]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META103:![0-9]+]], !DIExpression(), [[META106:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG107:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG107]]
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG107]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[META106]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META108:![0-9]+]], !DIExpression(), [[META109:![0-9]+]])
-// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META109]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META110:![0-9]+]], !DIExpression(), [[META111:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG112:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG112]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[META111]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META113:![0-9]+]], !DIExpression(), [[META114:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META114]]
-// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG115:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG116:![0-9]+]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG117:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG116]]
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG116]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG118:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG119:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG119]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG120:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG119]]
-// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG119]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG121:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG122:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG122]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG123:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG122]]
-// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG122]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG122]]
-// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG124:![0-9]+]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG125:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG124]]
-// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG124]]
-// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG126:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG127:![0-9]+]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG127]]
-// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG127]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG127]]
-// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG129:![0-9]+]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG129]]
-// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG129]]
-// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG129]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG129]]
-// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG129]]
-// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP7]], align 1, !dbg [[DBG129]]
-// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG130:![0-9]+]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG94:![0-9]+]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG94]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !dbg [[DBG94]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META95:![0-9]+]], !DIExpression(), [[META98:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG99:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG99]]
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG99]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F_ASCAST]], align 8, !dbg [[META98]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META100:![0-9]+]], !DIExpression(), [[META101:![0-9]+]])
+// CHECK1-NEXT: store ptr [[A_ADDR_ASCAST]], ptr [[G_ASCAST]], align 8, !dbg [[META101]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META102:![0-9]+]], !DIExpression(), [[META103:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4_ASCAST]], i64 0, i64 1, !dbg [[DBG104:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG104]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H_ASCAST]], align 8, !dbg [[META103]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META105:![0-9]+]], !DIExpression(), [[META106:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META106]]
+// CHECK1-NEXT: store i32 5, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG107:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4_ASCAST]], i64 0, i64 0, !dbg [[DBG108:![0-9]+]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG109:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG108]]
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG108]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG110:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG111:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG111]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG112:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG111]]
+// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG111]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG113:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG114:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG114]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG115:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG114]]
+// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG114]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG114]]
+// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4_ASCAST]], i64 0, i64 0, !dbg [[DBG116:![0-9]+]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG117:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG116]]
+// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG116]]
+// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG118:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4_ASCAST]], i64 0, i64 0, !dbg [[DBG119:![0-9]+]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG120:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG119]]
+// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG119]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG119]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG121:![0-9]+]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG121]]
+// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG121]]
+// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG121]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG121]]
+// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG121]]
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP7]], align 1, !dbg [[DBG121]]
+// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG122:![0-9]+]]
// CHECK1: omp.body.continue:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG101]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG93]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG92]]
-// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG92]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG101]], !llvm.loop [[LOOP131:![0-9]+]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG84]]
+// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG84]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP123:![0-9]+]]
// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG101]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG93]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG92]]
-// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG92]]
-// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG92]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG101]], !llvm.loop [[LOOP133:![0-9]+]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG84]]
+// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG84]]
+// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG84]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP125:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG132:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG134:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG124:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG126:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG135:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG127:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META138:![0-9]+]], !DIExpression(), [[META139:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META140:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META142:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META143:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG145:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG145]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG145]]
-// CHECK1-NEXT: ret void, !dbg [[DBG145]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META131:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META135:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META136:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG137:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG137]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG137]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG137]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG137]]
+// CHECK1-NEXT: ret void, !dbg [[DBG137]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG146:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG138:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META152:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META153:![0-9]+]], !DIExpression(), [[META154:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META155:![0-9]+]], !DIExpression(), [[META156:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META157:![0-9]+]], !DIExpression(), [[META158:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META159:![0-9]+]], !DIExpression(), [[META160:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG161:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG161]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG161]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG161]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG161]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG161]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG161]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META143:![0-9]+]], !DIExpression(), [[META144:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META145:![0-9]+]], !DIExpression(), [[META146:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META147:![0-9]+]], !DIExpression(), [[META148:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META149:![0-9]+]], !DIExpression(), [[META150:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META152:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG153:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG153]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG153]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG153]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG153]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG153]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG153]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG153]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG153]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG153]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG162:![0-9]+]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG164:![0-9]+]]
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG164]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG164]]
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG164]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG164]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG164]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG165:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG166:![0-9]+]]
+// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG154:![0-9]+]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG156:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED_ASCAST]], align 4, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG156]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG156]]
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG156]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG156]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG156]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG157:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG158:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG161]]
+// CHECK1-NEXT: ret void, !dbg [[DBG153]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG167:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG159:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META171]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META173:![0-9]+]], !DIExpression(), [[META171]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META174:![0-9]+]], !DIExpression(), [[META171]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META171]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG176:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG176]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG176]]
-// CHECK1-NEXT: ret void, !dbg [[DBG176]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META162:![0-9]+]], !DIExpression(), [[META163:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META164:![0-9]+]], !DIExpression(), [[META163]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META165:![0-9]+]], !DIExpression(), [[META163]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META166:![0-9]+]], !DIExpression(), [[META163]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META167:![0-9]+]], !DIExpression(), [[META163]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG168:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG168]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG168]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG168]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG168]]
+// CHECK1-NEXT: ret void, !dbg [[DBG168]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG177:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG169:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META180:![0-9]+]], !DIExpression(), [[META181:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META182:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META183:![0-9]+]], !DIExpression(), [[META184:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META185:![0-9]+]], !DIExpression(), [[META186:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META187:![0-9]+]], !DIExpression(), [[META188:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META189:![0-9]+]], !DIExpression(), [[META190:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG191:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG191]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG191]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG191]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META192:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META193:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG194:![0-9]+]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META195:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META196:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META197:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META198:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG191]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG199:![0-9]+]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG191]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META174:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META176:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META177:![0-9]+]], !DIExpression(), [[META178:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META179:![0-9]+]], !DIExpression(), [[META180:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META181:![0-9]+]], !DIExpression(), [[META182:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG183:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG183]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG183]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG183]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG183]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG183]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG183]], !nonnull [[META30]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IV]], [[META184:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_LB]], [[META185:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG186:![0-9]+]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_UB]], [[META187:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_STRIDE]], [[META188:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IS_LAST]], [[META189:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[I]], [[META190:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG183]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1), !dbg [[DBG191:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG183]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG194]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG194]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG186]]
+// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG186]]
// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG194]]
+// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG186]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG194]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG186]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG194]]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG191]]
-// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG191]]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG186]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG183]]
+// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG183]]
// CHECK1: omp.dispatch.body:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG191]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG183]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG191]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG191]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG183]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG183]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG200:![0-9]+]]
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG200]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG200]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META201:![0-9]+]], !DIExpression(), [[META203:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG204:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG204]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG204]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[META203]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META205:![0-9]+]], !DIExpression(), [[META206:![0-9]+]])
-// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META206]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META207:![0-9]+]], !DIExpression(), [[META208:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG209:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG209]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[META208]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META210:![0-9]+]], !DIExpression(), [[META211:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META211]]
-// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG212:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG214:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG213]]
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG213]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG215:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG216:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG216]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG217:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG216]]
-// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG216]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG218:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG219:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG219]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG220:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG219]]
-// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG219]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG219]]
-// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG221:![0-9]+]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG222:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG221]]
-// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG221]]
-// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG223:![0-9]+]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG224:![0-9]+]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG224]]
-// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG224]]
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG225:![0-9]+]]
-// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG226:![0-9]+]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG192:![0-9]+]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG192]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !dbg [[DBG192]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META193:![0-9]+]], !DIExpression(), [[META195:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG196:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG196]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG196]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F_ASCAST]], align 8, !dbg [[META195]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META197:![0-9]+]], !DIExpression(), [[META198:![0-9]+]])
+// CHECK1-NEXT: store ptr [[A_ADDR_ASCAST]], ptr [[G_ASCAST]], align 8, !dbg [[META198]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META199:![0-9]+]], !DIExpression(), [[META200:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG201:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG201]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H_ASCAST]], align 8, !dbg [[META200]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META202:![0-9]+]], !DIExpression(), [[META203:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META203]]
+// CHECK1-NEXT: store i32 5, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG204:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG205:![0-9]+]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG206:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG205]]
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG205]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG207:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG208:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG208]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG209:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG208]]
+// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG208]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG210:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG211:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG211]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG212:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG211]]
+// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG211]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG211]]
+// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG214:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG213]]
+// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG213]]
+// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG215:![0-9]+]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG216:![0-9]+]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG216]]
+// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG216]]
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[D_ASCAST]], align 4, !dbg [[DBG217:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG218:![0-9]+]]
// CHECK1: omp.body.continue:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG199]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG191]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG191]]
-// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG191]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG199]], !llvm.loop [[LOOP227:![0-9]+]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG183]]
+// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG183]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG191]], !llvm.loop [[LOOP219:![0-9]+]]
// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG199]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG191]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG191]]
-// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG191]]
-// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG191]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG199]], !llvm.loop [[LOOP229:![0-9]+]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG183]]
+// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG183]]
+// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG183]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG191]], !llvm.loop [[LOOP221:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG228:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG230:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG220:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG222:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG231:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG223:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META232:![0-9]+]], !DIExpression(), [[META233:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META234:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META235:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META236:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META238:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG239:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG239]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG239]]
-// CHECK1-NEXT: ret void, !dbg [[DBG239]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META224:![0-9]+]], !DIExpression(), [[META225:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META226:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META227:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META228:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META229:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META230:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG231:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG231]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG231]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG231]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG231]]
+// CHECK1-NEXT: ret void, !dbg [[DBG231]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG240:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG232:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META247:![0-9]+]], !DIExpression(), [[META248:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META249:![0-9]+]], !DIExpression(), [[META250:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META251:![0-9]+]], !DIExpression(), [[META252:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META253:![0-9]+]], !DIExpression(), [[META254:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]]
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG255]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG255]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG255]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META238:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META239:![0-9]+]], !DIExpression(), [[META240:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META241:![0-9]+]], !DIExpression(), [[META242:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META243:![0-9]+]], !DIExpression(), [[META244:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG247:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG247]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG247]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG247]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG247]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG247]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG247]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG247]]
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG247]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG247]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG247]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG247]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG256:![0-9]+]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG258:![0-9]+]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG258]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG258]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG258]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG258]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG258]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG258]]
-// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG258]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG258]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG259:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG260:![0-9]+]]
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG248:![0-9]+]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG250]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG250]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG250]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG250]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG250]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG250]]
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG250]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG250]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG251:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG252:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG255]]
+// CHECK1-NEXT: ret void, !dbg [[DBG247]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG261:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG253:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META264:![0-9]+]], !DIExpression(), [[META265:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META265]])
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META267:![0-9]+]], !DIExpression(), [[META265]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META268:![0-9]+]], !DIExpression(), [[META265]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META265]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG270:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG270]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG270]]
-// CHECK1-NEXT: ret void, !dbg [[DBG270]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META256:![0-9]+]], !DIExpression(), [[META257:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META258:![0-9]+]], !DIExpression(), [[META257]])
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META259:![0-9]+]], !DIExpression(), [[META257]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META260:![0-9]+]], !DIExpression(), [[META257]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META261:![0-9]+]], !DIExpression(), [[META257]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG262:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG262]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG262]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG262]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG262]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG262]]
+// CHECK1-NEXT: ret void, !dbg [[DBG262]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG271:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG263:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META274:![0-9]+]], !DIExpression(), [[META275:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META276:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META277:![0-9]+]], !DIExpression(), [[META278:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META279:![0-9]+]], !DIExpression(), [[META280:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META281:![0-9]+]], !DIExpression(), [[META282:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META283:![0-9]+]], !DIExpression(), [[META284:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG285:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG285]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG285]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG285]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG285]]
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META286:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META287:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG288:![0-9]+]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META289:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META290:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META291:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META292:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG293:![0-9]+]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG285]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP4]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META267:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META268:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META270:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META271:![0-9]+]], !DIExpression(), [[META272:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META273:![0-9]+]], !DIExpression(), [[META274:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META275:![0-9]+]], !DIExpression(), [[META276:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG277:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG277]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG277]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG277]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG277]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG277]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG277]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG277]]
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG277]], !nonnull [[META30]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IV]], [[META278:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_LB]], [[META279:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG280:![0-9]+]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_UB]], [[META281:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_STRIDE]], [[META282:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IS_LAST]], [[META283:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[I]], [[META284:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1), !dbg [[DBG285:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG277]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG288]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG288]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG280]]
+// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG280]]
// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG288]]
+// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG280]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG288]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG280]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG288]]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG285]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG285]]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG280]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG277]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG277]]
// CHECK1: omp.dispatch.body:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG285]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG277]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG285]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG285]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG277]]
+// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG277]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG294:![0-9]+]]
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG294]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG294]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META295:![0-9]+]], !DIExpression(), [[META297:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG298:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG298]]
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG298]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[META297]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META299:![0-9]+]], !DIExpression(), [[META300:![0-9]+]])
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[META300]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META301:![0-9]+]], !DIExpression(), [[META302:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG303:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG303]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[META302]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META304:![0-9]+]], !DIExpression(), [[META305:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META305]]
-// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG306:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG307:![0-9]+]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG308:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG307]]
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG307]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG309:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG310:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG310]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG311:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG310]]
-// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG310]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG312:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG313:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG313]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG314:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG313]]
-// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG313]]
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG313]]
-// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG315:![0-9]+]]
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG316:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG315]]
-// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG315]]
-// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG317:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG318:![0-9]+]]
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG319:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG318]]
-// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG318]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG318]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG318]]
-// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG320:![0-9]+]]
-// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP11]], align 1, !dbg [[DBG320]]
-// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG321:![0-9]+]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG286:![0-9]+]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG286]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !dbg [[DBG286]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META287:![0-9]+]], !DIExpression(), [[META289:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG290:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG290]]
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG290]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F_ASCAST]], align 8, !dbg [[META289]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META291:![0-9]+]], !DIExpression(), [[META292:![0-9]+]])
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G_ASCAST]], align 8, !dbg [[META292]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META293:![0-9]+]], !DIExpression(), [[META294:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG295:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG295]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H_ASCAST]], align 8, !dbg [[META294]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META296:![0-9]+]], !DIExpression(), [[META297:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META297]]
+// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG298:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG299:![0-9]+]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG300:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG299]]
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG299]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG301:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG302:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG302]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG303:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG302]]
+// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG302]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG304:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG305:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG305]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG306:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG305]]
+// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG305]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG305]]
+// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG307:![0-9]+]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG308:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG307]]
+// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG307]]
+// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG309:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG310:![0-9]+]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG311:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG310]]
+// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG310]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG310]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG310]]
+// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG312:![0-9]+]]
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP11]], align 1, !dbg [[DBG312]]
+// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG313:![0-9]+]]
// CHECK1: omp.body.continue:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG293]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG285]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG285]]
-// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG293]], !llvm.loop [[LOOP322:![0-9]+]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG277]]
+// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG285]], !llvm.loop [[LOOP314:![0-9]+]]
// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG293]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG285]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG285]]
-// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG285]]
-// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG293]], !llvm.loop [[LOOP324:![0-9]+]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG277]]
+// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG277]]
+// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG285]], !llvm.loop [[LOOP316:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG323:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG325:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG315:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG317:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG326:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG318:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META329:![0-9]+]], !DIExpression(), [[META330:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META331:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META332:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META333:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META334:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META335:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG336:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG336]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG336]]
-// CHECK1-NEXT: ret void, !dbg [[DBG336]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META321:![0-9]+]], !DIExpression(), [[META322:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META323:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META325:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META326:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG328:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG328]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG328]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG328]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG328]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG328]]
+// CHECK1-NEXT: ret void, !dbg [[DBG328]]
//
diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
index 26a01c75f951f..47d2c0c671d70 100644
--- a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
+++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp
@@ -55,888 +55,1015 @@ int main() {
return 0;
}
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG19:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG10:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i8, align 1
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META40:![0-9]+]], !DIExpression(), [[META41:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META42:![0-9]+]], !DIExpression(), [[META43:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META44:![0-9]+]], !DIExpression(), [[META45:![0-9]+]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META46:![0-9]+]], !DIExpression(), [[META47:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]])
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i8, align 1, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META31:![0-9]+]], !DIExpression(), [[META32:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META33:![0-9]+]], !DIExpression(), [[META34:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META35:![0-9]+]], !DIExpression(), [[META36:![0-9]+]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META37:![0-9]+]], !DIExpression(), [[META38:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META39:![0-9]+]], !DIExpression(), [[META40:![0-9]+]])
// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[DOTCAPTURE_EXPR_]] to i8
-// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 1
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR__ADDR]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG52:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG52]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG52]]
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG52]]
-// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG52]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG52]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG52]]
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META42:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG43:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG43]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG43]], !nonnull [[META30:![0-9]+]], !align [[META44:![0-9]+]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG43]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG43]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG43]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG43]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG43]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG43]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG43]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG53:![0-9]+]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG55:![0-9]+]]
-// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG55]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG55]]
-// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG55]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG55]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG55]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG56:![0-9]+]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG56]]
-// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG55]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG55]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG57:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG58:![0-9]+]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG45:![0-9]+]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG47:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED_ASCAST]], align 4, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG47]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG47]]
+// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG47]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG47]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG47]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !dbg [[DBG48:![0-9]+]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG48]]
+// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG47]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG47]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG49:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG50:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG52]]
+// CHECK1-NEXT: ret void, !dbg [[DBG43]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG59:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG51:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META66:![0-9]+]], !DIExpression(), [[META67:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META69:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR__ADDR]], [[META72:![0-9]+]], !DIExpression(), [[META67]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG73:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG73]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP8]] to i1, !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG73]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG73]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP9]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP10]], i1 [[LOADEDV]]) #[[ATTR3:[0-9]+]], !dbg [[DBG73]]
-// CHECK1-NEXT: ret void, !dbg [[DBG73]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META58:![0-9]+]], !DIExpression(), [[META59:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META61:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META62:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTCAPTURE_EXPR__ADDR]], [[META64:![0-9]+]], !DIExpression(), [[META59]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG65:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG65]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG65]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR_ASCAST]], align 1, !dbg [[DBG65]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP8]] to i1, !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG65]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG65]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP9]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP10]], i1 [[LOADEDV]]) #[[ATTR3:[0-9]+]], !dbg [[DBG65]]
+// CHECK1-NEXT: ret void, !dbg [[DBG65]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG74:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG66:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META81:![0-9]+]], !DIExpression(), [[META82:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META83:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META84:![0-9]+]], !DIExpression(), [[META85:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META86:![0-9]+]], !DIExpression(), [[META87:![0-9]+]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG92:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG92]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG92]]
-// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META93:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META94:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG95:![0-9]+]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META96:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META97:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META98:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: #dbg_declare(ptr [[B4]], [[META99:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG92]]
-// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META100:![0-9]+]], !DIExpression(), [[META82]])
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG92]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG101:![0-9]+]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG92]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B4:%.*]] = alloca [10 x [10 x i32]], align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[B4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B4]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META74:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META75:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META76:![0-9]+]], !DIExpression(), [[META77:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META80:![0-9]+]], !DIExpression(), [[META81:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META82:![0-9]+]], !DIExpression(), [[META83:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG84:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG84]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG84]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG84]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG84]]
+// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG84]], !nonnull [[META30]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IV]], [[META85:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_LB]], [[META86:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG87:![0-9]+]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_UB]], [[META88:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_STRIDE]], [[META89:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IS_LAST]], [[META90:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B4]], [[META91:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4_ASCAST]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG84]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[I]], [[META92:![0-9]+]], !DIExpression(), [[META74]])
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG84]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1), !dbg [[DBG93:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG84]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG95]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG95]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG87]]
+// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG87]]
// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG95]]
+// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG87]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG95]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG87]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG95]]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG92]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG92]]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG87]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG84]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG84]]
// CHECK1: omp.dispatch.body:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG92]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG84]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG92]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG92]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG84]]
+// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG84]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG102:![0-9]+]]
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG102]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG102]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META103:![0-9]+]], !DIExpression(), [[META106:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG107:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG107]]
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG107]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[META106]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META108:![0-9]+]], !DIExpression(), [[META109:![0-9]+]])
-// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META109]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META110:![0-9]+]], !DIExpression(), [[META111:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG112:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG112]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[META111]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META113:![0-9]+]], !DIExpression(), [[META114:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META114]]
-// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG115:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG116:![0-9]+]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG117:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG116]]
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG116]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG118:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG119:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG119]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG120:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG119]]
-// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG119]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG121:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG122:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG122]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG123:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG122]]
-// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG122]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG122]]
-// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG124:![0-9]+]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG125:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG124]]
-// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG124]]
-// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG126:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG127:![0-9]+]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG127]]
-// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG127]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG127]]
-// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG129:![0-9]+]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG129]]
-// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG129]]
-// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG129]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG129]]
-// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG129]]
-// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP7]], align 1, !dbg [[DBG129]]
-// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG130:![0-9]+]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG94:![0-9]+]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG94]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !dbg [[DBG94]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META95:![0-9]+]], !DIExpression(), [[META98:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG99:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG99]]
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG99]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F_ASCAST]], align 8, !dbg [[META98]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META100:![0-9]+]], !DIExpression(), [[META101:![0-9]+]])
+// CHECK1-NEXT: store ptr [[A_ADDR_ASCAST]], ptr [[G_ASCAST]], align 8, !dbg [[META101]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META102:![0-9]+]], !DIExpression(), [[META103:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4_ASCAST]], i64 0, i64 1, !dbg [[DBG104:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG104]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H_ASCAST]], align 8, !dbg [[META103]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META105:![0-9]+]], !DIExpression(), [[META106:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META106]]
+// CHECK1-NEXT: store i32 5, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG107:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4_ASCAST]], i64 0, i64 0, !dbg [[DBG108:![0-9]+]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG109:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG108]]
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG108]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG110:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG111:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG111]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG112:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG111]]
+// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG111]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG113:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG114:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG114]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG115:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG114]]
+// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG114]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG114]]
+// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4_ASCAST]], i64 0, i64 0, !dbg [[DBG116:![0-9]+]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG117:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG116]]
+// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG116]]
+// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG118:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4_ASCAST]], i64 0, i64 0, !dbg [[DBG119:![0-9]+]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG120:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG119]]
+// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG119]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG119]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG121:![0-9]+]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG121]]
+// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG121]]
+// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG121]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG121]]
+// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG121]]
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP7]], align 1, !dbg [[DBG121]]
+// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG122:![0-9]+]]
// CHECK1: omp.body.continue:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG101]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG93]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG92]]
-// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG92]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG101]], !llvm.loop [[LOOP131:![0-9]+]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG84]]
+// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG84]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP123:![0-9]+]]
// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG101]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG93]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG92]]
-// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG92]]
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG95]]
-// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG92]]
-// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG92]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG101]], !llvm.loop [[LOOP133:![0-9]+]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG84]]
+// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG84]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG87]]
+// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG84]]
+// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG84]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP125:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG132:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG134:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG124:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG126:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG135:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG127:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META138:![0-9]+]], !DIExpression(), [[META139:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META140:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META142:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META143:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META139]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG145:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG145]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG145]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG145]]
-// CHECK1-NEXT: ret void, !dbg [[DBG145]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META131:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META135:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META136:![0-9]+]], !DIExpression(), [[META131]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG137:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG137]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG137]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG137]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG137]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG137]]
+// CHECK1-NEXT: ret void, !dbg [[DBG137]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG146:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG138:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META152:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META153:![0-9]+]], !DIExpression(), [[META154:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META155:![0-9]+]], !DIExpression(), [[META156:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META157:![0-9]+]], !DIExpression(), [[META158:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META159:![0-9]+]], !DIExpression(), [[META160:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG161:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG161]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG161]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG161]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG161]]
-// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG161]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG161]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG161]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[A_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_CASTED]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META143:![0-9]+]], !DIExpression(), [[META144:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META145:![0-9]+]], !DIExpression(), [[META146:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META147:![0-9]+]], !DIExpression(), [[META148:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META149:![0-9]+]], !DIExpression(), [[META150:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META152:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG153:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG153]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG153]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG153]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG153]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG153]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG153]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG153]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG153]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG153]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG153]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG162:![0-9]+]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG164:![0-9]+]]
-// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG164]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG164]]
-// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG164]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG164]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG164]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG164]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG165:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG166:![0-9]+]]
+// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG154:![0-9]+]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG156:![0-9]+]]
+// CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED_ASCAST]], align 4, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED_ASCAST]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG156]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP13]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP12]] to ptr, !dbg [[DBG156]]
+// CHECK1-NEXT: store ptr [[TMP15]], ptr [[TMP14]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG156]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP16]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG156]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG156]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG156]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG157:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG158:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG161]]
+// CHECK1-NEXT: ret void, !dbg [[DBG153]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG167:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG159:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META171]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META173:![0-9]+]], !DIExpression(), [[META171]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META174:![0-9]+]], !DIExpression(), [[META171]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META171]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG176:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG176]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG176]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG176]]
-// CHECK1-NEXT: ret void, !dbg [[DBG176]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META162:![0-9]+]], !DIExpression(), [[META163:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META164:![0-9]+]], !DIExpression(), [[META163]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META165:![0-9]+]], !DIExpression(), [[META163]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META166:![0-9]+]], !DIExpression(), [[META163]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META167:![0-9]+]], !DIExpression(), [[META163]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG168:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG168]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG168]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG168]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG168]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG168]]
+// CHECK1-NEXT: ret void, !dbg [[DBG168]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG177:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG169:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META180:![0-9]+]], !DIExpression(), [[META181:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META182:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META183:![0-9]+]], !DIExpression(), [[META184:![0-9]+]])
-// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META185:![0-9]+]], !DIExpression(), [[META186:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META187:![0-9]+]], !DIExpression(), [[META188:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META189:![0-9]+]], !DIExpression(), [[META190:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG191:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG191]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG191]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG191]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META192:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META193:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG194:![0-9]+]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META195:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META196:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META197:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META198:![0-9]+]], !DIExpression(), [[META181]])
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG191]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG199:![0-9]+]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG191]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META174:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META176:![0-9]+]])
+// CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR_ASCAST]], align 4
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META177:![0-9]+]], !DIExpression(), [[META178:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META179:![0-9]+]], !DIExpression(), [[META180:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META181:![0-9]+]], !DIExpression(), [[META182:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG183:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG183]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG183]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG183]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG183]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG183]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG183]], !nonnull [[META30]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IV]], [[META184:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_LB]], [[META185:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG186:![0-9]+]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_UB]], [[META187:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_STRIDE]], [[META188:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IS_LAST]], [[META189:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[I]], [[META190:![0-9]+]], !DIExpression(), [[META173]])
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG183]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1), !dbg [[DBG191:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG183]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG194]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG194]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG186]]
+// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG186]]
// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG194]]
+// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG186]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG194]]
+// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG186]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG194]]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG191]]
-// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG191]]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG186]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG183]]
+// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG183]]
// CHECK1: omp.dispatch.body:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG191]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG183]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG191]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG191]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG183]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG183]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG200:![0-9]+]]
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG200]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG200]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META201:![0-9]+]], !DIExpression(), [[META203:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG204:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG204]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG204]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[META203]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META205:![0-9]+]], !DIExpression(), [[META206:![0-9]+]])
-// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META206]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META207:![0-9]+]], !DIExpression(), [[META208:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG209:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG209]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[META208]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META210:![0-9]+]], !DIExpression(), [[META211:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META211]]
-// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG212:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG214:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG213]]
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG213]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG215:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG216:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG216]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG217:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG216]]
-// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG216]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG218:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG219:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG219]]
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG220:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG219]]
-// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG219]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG219]]
-// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG221:![0-9]+]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG222:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG221]]
-// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG221]]
-// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG223:![0-9]+]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG224:![0-9]+]]
-// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG224]]
-// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG224]]
-// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG225:![0-9]+]]
-// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG226:![0-9]+]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG192:![0-9]+]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG192]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !dbg [[DBG192]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META193:![0-9]+]], !DIExpression(), [[META195:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG196:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG196]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG196]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F_ASCAST]], align 8, !dbg [[META195]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META197:![0-9]+]], !DIExpression(), [[META198:![0-9]+]])
+// CHECK1-NEXT: store ptr [[A_ADDR_ASCAST]], ptr [[G_ASCAST]], align 8, !dbg [[META198]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META199:![0-9]+]], !DIExpression(), [[META200:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG201:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG201]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H_ASCAST]], align 8, !dbg [[META200]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META202:![0-9]+]], !DIExpression(), [[META203:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META203]]
+// CHECK1-NEXT: store i32 5, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG204:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG205:![0-9]+]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG206:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG205]]
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG205]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG207:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG208:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG208]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG209:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG208]]
+// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG208]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG210:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG211:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG211]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG212:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG211]]
+// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG211]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG211]]
+// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG213:![0-9]+]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG214:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG213]]
+// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG213]]
+// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG215:![0-9]+]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG216:![0-9]+]]
+// CHECK1-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG216]]
+// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[LOADEDV]] to i32, !dbg [[DBG216]]
+// CHECK1-NEXT: store i32 [[CONV]], ptr [[D_ASCAST]], align 4, !dbg [[DBG217:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG218:![0-9]+]]
// CHECK1: omp.body.continue:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG199]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG191]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG191]]
-// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG191]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG199]], !llvm.loop [[LOOP227:![0-9]+]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG183]]
+// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG183]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG191]], !llvm.loop [[LOOP219:![0-9]+]]
// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG199]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG191]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG191]]
-// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG191]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG194]]
-// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG191]]
-// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG191]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG199]], !llvm.loop [[LOOP229:![0-9]+]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG183]]
+// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG183]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG186]]
+// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG183]]
+// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG183]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG191]], !llvm.loop [[LOOP221:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG228:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG230:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG220:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG222:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG231:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG223:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META232:![0-9]+]], !DIExpression(), [[META233:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META234:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META235:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META236:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META238:![0-9]+]], !DIExpression(), [[META233]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG239:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG239]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG239]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG239]]
-// CHECK1-NEXT: ret void, !dbg [[DBG239]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META224:![0-9]+]], !DIExpression(), [[META225:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META226:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META227:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META228:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META229:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META230:![0-9]+]], !DIExpression(), [[META225]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG231:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG231]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG231]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG231]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG231]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG231]]
+// CHECK1-NEXT: ret void, !dbg [[DBG231]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG240:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG232:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META247:![0-9]+]], !DIExpression(), [[META248:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META249:![0-9]+]], !DIExpression(), [[META250:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META251:![0-9]+]], !DIExpression(), [[META252:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META253:![0-9]+]], !DIExpression(), [[META254:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]]
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]]
-// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG255]]
-// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG255]]
-// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG255]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META238:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META239:![0-9]+]], !DIExpression(), [[META240:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META241:![0-9]+]], !DIExpression(), [[META242:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META243:![0-9]+]], !DIExpression(), [[META244:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG247:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG247]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG247]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG247]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG247]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG247]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG247]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG247]]
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG247]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG247]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG247]]
+// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG247]]
+// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG247]]
// CHECK1: user_code.entry:
-// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG256:![0-9]+]]
-// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG258:![0-9]+]]
-// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG258]]
-// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG258]]
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG258]]
-// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG258]]
-// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG258]]
-// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG258]]
-// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG258]]
-// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG258]]
-// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG259:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG260:![0-9]+]]
+// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG248:![0-9]+]]
+// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0, !dbg [[DBG250:![0-9]+]]
+// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG250]]
+// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1, !dbg [[DBG250]]
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG250]]
+// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2, !dbg [[DBG250]]
+// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG250]]
+// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3, !dbg [[DBG250]]
+// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG250]]
+// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4), !dbg [[DBG250]]
+// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG251:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG252:![0-9]+]]
// CHECK1: worker.exit:
-// CHECK1-NEXT: ret void, !dbg [[DBG255]]
+// CHECK1-NEXT: ret void, !dbg [[DBG247]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
-// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG261:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG253:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META264:![0-9]+]], !DIExpression(), [[META265:![0-9]+]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META265]])
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META267:![0-9]+]], !DIExpression(), [[META265]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META268:![0-9]+]], !DIExpression(), [[META265]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META265]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG270:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG270]]
-// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG270]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG270]]
-// CHECK1-NEXT: ret void, !dbg [[DBG270]]
+// CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DYN_PTR_ADDR]], [[META256:![0-9]+]], !DIExpression(), [[META257:![0-9]+]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META258:![0-9]+]], !DIExpression(), [[META257]])
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META259:![0-9]+]], !DIExpression(), [[META257]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META260:![0-9]+]], !DIExpression(), [[META257]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META261:![0-9]+]], !DIExpression(), [[META257]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG262:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG262]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG262]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG262]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG262]]
+// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG262]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG262]]
+// CHECK1-NEXT: ret void, !dbg [[DBG262]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG271:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG263:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8
-// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META274:![0-9]+]], !DIExpression(), [[META275:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META276:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META277:![0-9]+]], !DIExpression(), [[META278:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META279:![0-9]+]], !DIExpression(), [[META280:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META281:![0-9]+]], !DIExpression(), [[META282:![0-9]+]])
-// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META283:![0-9]+]], !DIExpression(), [[META284:![0-9]+]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG285:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG285]]
-// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG285]]
-// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG285]]
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG285]]
-// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META286:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META287:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG288:![0-9]+]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META289:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META290:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META291:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META292:![0-9]+]], !DIExpression(), [[META275]])
-// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG293:![0-9]+]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG285]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK1-NEXT: [[TMP:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP2:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[_TMP4:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[F:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[G:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
+// CHECK1-NEXT: [[TMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP1]] to ptr
+// CHECK1-NEXT: [[TMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP2]] to ptr
+// CHECK1-NEXT: [[TMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP3]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
+// CHECK1-NEXT: [[TMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[_TMP4]] to ptr
+// CHECK1-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr
+// CHECK1-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr
+// CHECK1-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr
+// CHECK1-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
+// CHECK1-NEXT: [[F_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F]] to ptr
+// CHECK1-NEXT: [[G_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G]] to ptr
+// CHECK1-NEXT: [[H_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H]] to ptr
+// CHECK1-NEXT: [[D_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META267:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META268:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META270:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META271:![0-9]+]], !DIExpression(), [[META272:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META273:![0-9]+]], !DIExpression(), [[META274:![0-9]+]])
+// CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META275:![0-9]+]], !DIExpression(), [[META276:![0-9]+]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG277:![0-9]+]]
+// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG277]]
+// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP_ASCAST]], align 8, !dbg [[DBG277]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG277]]
+// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP1_ASCAST]], align 8, !dbg [[DBG277]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG277]]
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP2_ASCAST]], align 8, !dbg [[DBG277]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG277]]
+// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP3_ASCAST]], align 8, !dbg [[DBG277]], !nonnull [[META30]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IV]], [[META278:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_LB]], [[META279:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG280:![0-9]+]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_UB]], [[META281:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_STRIDE]], [[META282:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTOMP_IS_LAST]], [[META283:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[I]], [[META284:![0-9]+]], !DIExpression(), [[META267]])
+// CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1), !dbg [[DBG285:![0-9]+]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG277]]
// CHECK1: omp.dispatch.cond:
-// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG288]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG288]]
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP14]], 9, !dbg [[DBG280]]
+// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG280]]
// CHECK1: cond.true:
-// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG288]]
+// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG280]]
// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG288]]
+// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG280]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG288]]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG285]]
-// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG285]]
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ], !dbg [[DBG280]]
+// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]], !dbg [[DBG277]]
+// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG277]]
// CHECK1: omp.dispatch.body:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG285]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG277]]
// CHECK1: omp.inner.for.cond:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG285]]
-// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG285]]
+// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]], !dbg [[DBG277]]
+// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG277]]
// CHECK1: omp.inner.for.body:
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG294:![0-9]+]]
-// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG294]]
-// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG294]]
-// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META295:![0-9]+]], !DIExpression(), [[META297:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG298:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG298]]
-// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG298]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[META297]]
-// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META299:![0-9]+]], !DIExpression(), [[META300:![0-9]+]])
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[META300]]
-// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META301:![0-9]+]], !DIExpression(), [[META302:![0-9]+]])
-// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG303:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG303]]
-// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[META302]]
-// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META304:![0-9]+]], !DIExpression(), [[META305:![0-9]+]])
-// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META305]]
-// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG306:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG307:![0-9]+]]
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG308:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG307]]
-// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG307]]
-// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG309:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG310:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG310]]
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG311:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG310]]
-// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG310]]
-// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG312:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG313:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG313]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG314:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG313]]
-// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG313]]
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG313]]
-// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG315:![0-9]+]]
-// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG316:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG315]]
-// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG315]]
-// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG317:![0-9]+]]
-// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG318:![0-9]+]]
-// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG319:![0-9]+]]
-// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG318]]
-// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG318]]
-// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG318]]
-// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG318]]
-// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG320:![0-9]+]]
-// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP11]], align 1, !dbg [[DBG320]]
-// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG321:![0-9]+]]
+// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG286:![0-9]+]]
+// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG286]]
+// CHECK1-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !dbg [[DBG286]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[F]], [[META287:![0-9]+]], !DIExpression(), [[META289:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG290:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG290]]
+// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG290]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F_ASCAST]], align 8, !dbg [[META289]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[G]], [[META291:![0-9]+]], !DIExpression(), [[META292:![0-9]+]])
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G_ASCAST]], align 8, !dbg [[META292]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[H]], [[META293:![0-9]+]], !DIExpression(), [[META294:![0-9]+]])
+// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG295:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG295]]
+// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H_ASCAST]], align 8, !dbg [[META294]]
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[D]], [[META296:![0-9]+]], !DIExpression(), [[META297:![0-9]+]])
+// CHECK1-NEXT: store i32 15, ptr [[D_ASCAST]], align 4, !dbg [[META297]]
+// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG298:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG299:![0-9]+]]
+// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG300:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG299]]
+// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG299]]
+// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG301:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG302:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG302]]
+// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG303:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG302]]
+// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG302]]
+// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG304:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG305:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG305]]
+// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG306:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP24]] to i64, !dbg [[DBG305]]
+// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG305]]
+// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG305]]
+// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG307:![0-9]+]]
+// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG308:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP26]] to i64, !dbg [[DBG307]]
+// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG307]]
+// CHECK1-NEXT: store i32 [[TMP25]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG309:![0-9]+]]
+// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG310:![0-9]+]]
+// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG311:![0-9]+]]
+// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP27]] to i64, !dbg [[DBG310]]
+// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG310]]
+// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG310]]
+// CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP28]], 0, !dbg [[DBG310]]
+// CHECK1-NEXT: [[STOREDV:%.*]] = zext i1 [[TOBOOL]] to i8, !dbg [[DBG312:![0-9]+]]
+// CHECK1-NEXT: store i8 [[STOREDV]], ptr [[TMP11]], align 1, !dbg [[DBG312]]
+// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG313:![0-9]+]]
// CHECK1: omp.body.continue:
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG293]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG285]]
// CHECK1: omp.inner.for.inc:
-// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG285]]
-// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG293]], !llvm.loop [[LOOP322:![0-9]+]]
+// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP29]], 1, !dbg [[DBG277]]
+// CHECK1-NEXT: store i32 [[ADD27]], ptr [[DOTOMP_IV_ASCAST]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG285]], !llvm.loop [[LOOP314:![0-9]+]]
// CHECK1: omp.inner.for.end:
-// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG293]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG285]]
// CHECK1: omp.dispatch.inc:
-// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG285]]
-// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG288]]
-// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG285]]
-// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG285]]
-// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG293]], !llvm.loop [[LOOP324:![0-9]+]]
+// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP30]], [[TMP31]], !dbg [[DBG277]]
+// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_LB_ASCAST]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4, !dbg [[DBG280]]
+// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP32]], [[TMP33]], !dbg [[DBG277]]
+// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_UB_ASCAST]], align 4, !dbg [[DBG277]]
+// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG285]], !llvm.loop [[LOOP316:![0-9]+]]
// CHECK1: omp.dispatch.end:
-// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG323:![0-9]+]]
-// CHECK1-NEXT: ret void, !dbg [[DBG325:![0-9]+]]
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB19:[0-9]+]], i32 [[TMP13]]), !dbg [[DBG315:![0-9]+]]
+// CHECK1-NEXT: ret void, !dbg [[DBG317:![0-9]+]]
//
//
// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG326:![0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG318:![0-9]+]] {
// CHECK1-NEXT: entry:
-// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META329:![0-9]+]], !DIExpression(), [[META330:![0-9]+]])
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META331:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META332:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META333:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META334:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8
-// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META335:![0-9]+]], !DIExpression(), [[META330]])
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG336:![0-9]+]]
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG336]]
-// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG336]]
-// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG336]]
-// CHECK1-NEXT: ret void, !dbg [[DBG336]]
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr
+// CHECK1-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
+// CHECK1-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK1-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
+// CHECK1-NEXT: [[BB_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BB_ADDR]] to ptr
+// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]], [[META321:![0-9]+]], !DIExpression(), [[META322:![0-9]+]])
+// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[DOTBOUND_TID__ADDR]], [[META323:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[C_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[A_ADDR]], [[META325:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[B_ADDR]], [[META326:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR_ASCAST]], align 8
+// CHECK1-NEXT: #dbg_declare(ptr addrspace(5) [[BB_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META322]])
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG328:![0-9]+]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG328]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG328]], !nonnull [[META30]], !align [[META44]]
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG328]], !nonnull [[META30]]
+// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[BB_ADDR_ASCAST]], align 8, !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG328]]
+// CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG328]]
+// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG328]]
+// CHECK1-NEXT: ret void, !dbg [[DBG328]]
//
More information about the llvm-commits
mailing list