[clang] b6f85d8 - [CodeGen][OpenMP] Use correct type in EmitLoadOfPointer()
Nikita Popov via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 21 04:08:13 PDT 2022
Author: Nikita Popov
Date: 2022-03-21T12:08:05+01:00
New Revision: b6f85d8539455d82870bc275ef0de9a7973ccb17
URL: https://github.com/llvm/llvm-project/commit/b6f85d8539455d82870bc275ef0de9a7973ccb17
DIFF: https://github.com/llvm/llvm-project/commit/b6f85d8539455d82870bc275ef0de9a7973ccb17.diff
LOG: [CodeGen][OpenMP] Use correct type in EmitLoadOfPointer()
Rather than using a dummy void pointer type, we should specify the
correct private type and perform the bitcast beforehand rather than
afterwards. This way, the Address will have correct alignment
information.
Added:
Modified:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CGOpenMPRuntime.h
clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/for_reduction_task_codegen.cpp
clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
clang/test/OpenMP/reduction_implicit_map.cpp
clang/test/OpenMP/sections_reduction_task_codegen.cpp
clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/task_in_reduction_codegen.cpp
clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 840775308684b..f1439f20f50cc 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -824,9 +824,7 @@ void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
}
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
- const auto *PrivateVD =
- cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- QualType PrivateType = PrivateVD->getType();
+ QualType PrivateType = getPrivateType(N);
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
@@ -861,9 +859,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
llvm::Value *Size) {
- const auto *PrivateVD =
- cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- QualType PrivateType = PrivateVD->getType();
+ QualType PrivateType = getPrivateType(N);
if (!PrivateType->isVariablyModifiedType()) {
assert(!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
@@ -886,9 +882,6 @@ void ReductionCodeGen::emitInitialization(
cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
const OMPDeclareReductionDecl *DRD =
getReductionInit(ClausesData[N].ReductionOp);
- QualType PrivateType = PrivateVD->getType();
- PrivateAddr = CGF.Builder.CreateElementBitCast(
- PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
if (DRD && DRD->getInitializer())
(void)DefaultInit(CGF);
@@ -907,18 +900,14 @@ void ReductionCodeGen::emitInitialization(
}
bool ReductionCodeGen::needCleanups(unsigned N) {
- const auto *PrivateVD =
- cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- QualType PrivateType = PrivateVD->getType();
+ QualType PrivateType = getPrivateType(N);
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
return DTorKind != QualType::DK_none;
}
void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
Address PrivateAddr) {
- const auto *PrivateVD =
- cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
- QualType PrivateType = PrivateVD->getType();
+ QualType PrivateType = getPrivateType(N);
QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
if (needCleanups(N)) {
PrivateAddr = CGF.Builder.CreateElementBitCast(
@@ -5906,9 +5895,12 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
Fn->setDoesNotRecurse();
CodeGenFunction CGF(CGM);
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
+ QualType PrivateType = RCG.getPrivateType(N);
Address PrivateAddr = CGF.EmitLoadOfPointer(
- CGF.GetAddrOfLocalVar(&Param),
- C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
+ CGF.Builder.CreateElementBitCast(
+ CGF.GetAddrOfLocalVar(&Param),
+ CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
+ C.getPointerType(PrivateType)->castAs<PointerType>());
llvm::Value *Size = nullptr;
// If the size of the reduction item is non-constant, load it from global
// threadprivate variable.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 19754b0cfacc9..626e86a1ddc91 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -218,6 +218,11 @@ class ReductionCodeGen {
/// Returns true if the initialization of the reduction item uses initializer
/// from declare reduction construct.
bool usesReductionInitializer(unsigned N) const;
+ /// Return the type of the private item.
+ QualType getPrivateType(unsigned N) const {
+ return cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl())
+ ->getType();
+ }
};
class CGOpenMPRuntime {
diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
index b2fbf617069ed..70425f967913b 100644
--- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
@@ -431,9 +431,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -1050,9 +1050,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp
index 51c47d6e42d81..017ea0b73e107 100644
--- a/clang/test/OpenMP/for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp
@@ -340,9 +340,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -868,9 +868,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
index 1dfc66fb95839..546f82b300577 100644
--- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
@@ -211,9 +211,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -242,9 +242,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -273,9 +273,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -304,8 +304,8 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -413,16 +413,16 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
@@ -888,9 +888,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -919,9 +919,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -950,9 +950,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -981,8 +981,8 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -1090,16 +1090,16 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
index 9c23a42584b05..c408337c693e3 100644
--- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
@@ -211,9 +211,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -242,9 +242,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -273,9 +273,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -304,8 +304,8 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -413,16 +413,16 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
@@ -888,9 +888,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -919,9 +919,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -950,9 +950,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -981,8 +981,8 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -1090,16 +1090,16 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
index 173247d360826..b51b6cec2fedb 100644
--- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
@@ -335,9 +335,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -859,9 +859,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
index a39afae3156fd..31f3bbbf58e74 100644
--- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
@@ -290,9 +290,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -769,9 +769,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
index df79f086ba1e0..6057faa382227 100644
--- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
@@ -281,9 +281,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -751,9 +751,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
index b620508a79286..167fe633fbad8 100644
--- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
@@ -323,9 +323,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -835,9 +835,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index f6ed27e73ff70..b734703ac07de 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -711,14 +711,14 @@ int main()
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP2]] to double*
-// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP3]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[TMP4]], [[TMP5]]
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to double**
+// CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP3]], i64 [[TMP4]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[TMP3]], [[TMP5]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store double 0.000000e+00, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]]
diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
index f0617e2de96c5..1aeea82bb3292 100644
--- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
@@ -328,9 +328,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -844,9 +844,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
index fdb77b47a27c4..a4e0aecf724e5 100644
--- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
@@ -348,9 +348,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -885,9 +885,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
index ecfde122259f9..bd8a017a63013 100644
--- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
@@ -294,9 +294,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -777,9 +777,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
index 2f09139cfbc98..4b240a1462ec4 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -321,9 +321,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -694,9 +694,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -1253,9 +1253,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1626,9 +1626,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp
index 686e11b9b9c96..b1cb354c4e6ff 100644
--- a/clang/test/OpenMP/task_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp
@@ -235,9 +235,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -266,9 +266,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -297,9 +297,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -328,8 +328,8 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -437,16 +437,16 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
@@ -906,9 +906,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -937,9 +937,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -968,9 +968,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -999,8 +999,8 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -1108,16 +1108,16 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
diff --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
index fa2f8e1ae9a6d..3633e48aadcde 100644
--- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
@@ -211,9 +211,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -242,9 +242,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -273,9 +273,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -304,8 +304,8 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -413,16 +413,16 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
@@ -881,9 +881,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -912,9 +912,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -943,9 +943,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -974,8 +974,8 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -1083,16 +1083,16 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
index 9a8daed3a72c7..4242713529624 100644
--- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
@@ -211,9 +211,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -242,9 +242,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -273,9 +273,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -304,8 +304,8 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -413,16 +413,16 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK1: omp.arrayinit.body:
-// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK1-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
@@ -881,9 +881,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -912,9 +912,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: store float 0.000000e+00, float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -943,9 +943,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -974,8 +974,8 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to [5 x %struct.S]*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to [5 x %struct.S]**
+// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x %struct.S]*, [5 x %struct.S]** [[TMP2]], align 8
// CHECK2-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [5 x %struct.S], [5 x %struct.S]* [[TMP3]], i32 0, i32 0
// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP4]]
@@ -1083,16 +1083,16 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP3]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP6]]
-// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP8]]
+// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP4:%.*]] = load i16*, i16** [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i64*
+// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr i16, i16* [[TMP4]], i64 [[TMP7]]
+// CHECK2-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i16* [[TMP4]], [[TMP8]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]]
// CHECK2: omp.arrayinit.body:
-// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
+// CHECK2-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i16* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ]
// CHECK2-NEXT: store i16 0, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 2
// CHECK2-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i16, i16* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1
// CHECK2-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i16* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
index b5badb9acd863..abc3493054849 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -327,9 +327,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -700,9 +700,9 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -1264,9 +1264,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1637,9 +1637,9 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: store i32 0, i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
More information about the cfe-commits
mailing list