[clang] 52cc65d - [OpenMPRuntime] Specify correct pointer type
Nikita Popov via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 18 06:26:06 PDT 2022
Author: Nikita Popov
Date: 2022-03-18T14:25:51+01:00
New Revision: 52cc65d4740992a29ae8375498c3b470f003de26
URL: https://github.com/llvm/llvm-project/commit/52cc65d4740992a29ae8375498c3b470f003de26
DIFF: https://github.com/llvm/llvm-project/commit/52cc65d4740992a29ae8375498c3b470f003de26.diff
LOG: [OpenMPRuntime] Specify correct pointer type
Rather than specifying a dummy type in EmitLoadOfPointer() and
then casting it to the correct one, we should instead specify the
correct type and cast beforehand. Otherwise the computed alignment
will be incorrect.
Added:
Modified:
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/for_reduction_task_codegen.cpp
clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
clang/test/OpenMP/reduction_implicit_map.cpp
clang/test/OpenMP/sections_reduction_task_codegen.cpp
clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/task_in_reduction_codegen.cpp
clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 1192d194f5865..840775308684b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5993,19 +5993,19 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
PrivateScope.addPrivate(
LHSVD,
// Pull out the pointer to the variable.
- CGF.Builder.CreateElementBitCast(
- CGF.EmitLoadOfPointer(
+ CGF.EmitLoadOfPointer(
+ CGF.Builder.CreateElementBitCast(
CGF.GetAddrOfLocalVar(&ParamInOut),
- C.getPointerType(C.VoidPtrTy).castAs<PointerType>()),
- CGF.ConvertTypeForMem(LHSVD->getType())));
+ CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
+ C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
PrivateScope.addPrivate(
RHSVD,
// Pull out the pointer to the variable.
- CGF.Builder.CreateElementBitCast(
- CGF.EmitLoadOfPointer(
- CGF.GetAddrOfLocalVar(&ParamIn),
- C.getPointerType(C.VoidPtrTy).castAs<PointerType>()),
- CGF.ConvertTypeForMem(RHSVD->getType())));
+ CGF.EmitLoadOfPointer(
+ CGF.Builder.CreateElementBitCast(
+ CGF.GetAddrOfLocalVar(&ParamIn),
+ CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
+ C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
PrivateScope.Privatize();
// Emit the combiner body:
// %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
index 5b00d194d1be1..b2fbf617069ed 100644
--- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
@@ -444,14 +444,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -1063,14 +1063,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp
index 7fc4b8fedeef4..51c47d6e42d81 100644
--- a/clang/test/OpenMP/for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp
@@ -353,14 +353,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -881,14 +881,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
index cb67c0b4b23b2..1dfc66fb95839 100644
--- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
@@ -224,14 +224,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -255,14 +255,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -286,14 +286,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -358,10 +358,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -442,10 +442,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -901,14 +901,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -932,14 +932,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -963,14 +963,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1035,10 +1035,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1119,10 +1119,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
index 590daa75f3702..9c23a42584b05 100644
--- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
@@ -224,14 +224,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -255,14 +255,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -286,14 +286,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -358,10 +358,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -442,10 +442,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -901,14 +901,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -932,14 +932,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -963,14 +963,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1035,10 +1035,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1119,10 +1119,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
index 0b78b407dd075..173247d360826 100644
--- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
@@ -348,14 +348,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -872,14 +872,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
index df366df3cdec5..a39afae3156fd 100644
--- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
@@ -303,14 +303,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -782,14 +782,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
index e2c71c1d708a5..df79f086ba1e0 100644
--- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
@@ -294,14 +294,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -764,14 +764,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
index eee8e56f46084..b620508a79286 100644
--- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
@@ -336,14 +336,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -848,14 +848,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index 80b27e6df1fb9..f6ed27e73ff70 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -735,10 +735,10 @@ int main()
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to double*
-// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to double*
+// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to double**
+// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8
+// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[DOTADDR1]] to double**
+// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8
// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP2]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[TMP4]], [[TMP7]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
index f6f96d52564f6..f0617e2de96c5 100644
--- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
@@ -341,14 +341,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -857,14 +857,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
index f0c0a902b4140..fdb77b47a27c4 100644
--- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
@@ -361,14 +361,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -898,14 +898,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
index 24604c1608dfb..ecfde122259f9 100644
--- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
@@ -307,14 +307,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -790,14 +790,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
index 9ded17d77f9d1..2f09139cfbc98 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -334,14 +334,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -707,14 +707,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -1266,14 +1266,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1639,14 +1639,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
diff --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp
index 262ca72d0103e..686e11b9b9c96 100644
--- a/clang/test/OpenMP/task_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp
@@ -248,14 +248,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -279,14 +279,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -310,14 +310,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -382,10 +382,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -466,10 +466,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -919,14 +919,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -950,14 +950,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -981,14 +981,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1053,10 +1053,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1137,10 +1137,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
diff --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
index 41552686514d3..fa2f8e1ae9a6d 100644
--- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
@@ -224,14 +224,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -255,14 +255,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -286,14 +286,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -358,10 +358,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -442,10 +442,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -894,14 +894,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -925,14 +925,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -956,14 +956,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1028,10 +1028,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1112,10 +1112,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
index 23c24fc97b47a..9a8daed3a72c7 100644
--- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
@@ -224,14 +224,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -255,14 +255,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -286,14 +286,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -358,10 +358,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -442,10 +442,10 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -894,14 +894,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -925,14 +925,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT: [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT: [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT: store float [[ADD]], float* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -956,14 +956,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1028,10 +1028,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1112,10 +1112,10 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT: [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT: [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
// CHECK2-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
// CHECK2-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
index 489ff2f8a2927..b5badb9acd863 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -340,14 +340,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -713,14 +713,14 @@ int main(int argc, char **argv) {
// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK1-NEXT: ret void
//
//
@@ -1277,14 +1277,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
@@ -1650,14 +1650,14 @@ int main(int argc, char **argv) {
// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8
// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8
// CHECK2-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP3]], align 4
// CHECK2-NEXT: ret void
//
//
More information about the cfe-commits
mailing list