[clang] 52cc65d - [OpenMPRuntime] Specify correct pointer type

Nikita Popov via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 18 06:26:06 PDT 2022


Author: Nikita Popov
Date: 2022-03-18T14:25:51+01:00
New Revision: 52cc65d4740992a29ae8375498c3b470f003de26

URL: https://github.com/llvm/llvm-project/commit/52cc65d4740992a29ae8375498c3b470f003de26
DIFF: https://github.com/llvm/llvm-project/commit/52cc65d4740992a29ae8375498c3b470f003de26.diff

LOG: [OpenMPRuntime] Specify correct pointer type

Rather than specifying a dummy type in EmitLoadOfPointer() and
then casting it to the correct one, we should instead specify the
correct type and cast beforehand. Otherwise the computed alignment
will be incorrect.

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
    clang/test/OpenMP/for_reduction_task_codegen.cpp
    clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
    clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
    clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
    clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
    clang/test/OpenMP/parallel_reduction_task_codegen.cpp
    clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
    clang/test/OpenMP/reduction_implicit_map.cpp
    clang/test/OpenMP/sections_reduction_task_codegen.cpp
    clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
    clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
    clang/test/OpenMP/task_in_reduction_codegen.cpp
    clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
    clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 1192d194f5865..840775308684b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -5993,19 +5993,19 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
   PrivateScope.addPrivate(
       LHSVD,
       // Pull out the pointer to the variable.
-      CGF.Builder.CreateElementBitCast(
-          CGF.EmitLoadOfPointer(
+      CGF.EmitLoadOfPointer(
+          CGF.Builder.CreateElementBitCast(
               CGF.GetAddrOfLocalVar(&ParamInOut),
-              C.getPointerType(C.VoidPtrTy).castAs<PointerType>()),
-          CGF.ConvertTypeForMem(LHSVD->getType())));
+              CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
+          C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
   PrivateScope.addPrivate(
       RHSVD,
       // Pull out the pointer to the variable.
-      CGF.Builder.CreateElementBitCast(
-          CGF.EmitLoadOfPointer(
-              CGF.GetAddrOfLocalVar(&ParamIn),
-              C.getPointerType(C.VoidPtrTy).castAs<PointerType>()),
-          CGF.ConvertTypeForMem(RHSVD->getType())));
+      CGF.EmitLoadOfPointer(
+          CGF.Builder.CreateElementBitCast(
+            CGF.GetAddrOfLocalVar(&ParamIn),
+            CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
+          C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
   PrivateScope.Privatize();
   // Emit the combiner body:
   // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)

diff  --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
index 5b00d194d1be1..b2fbf617069ed 100644
--- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
@@ -444,14 +444,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1063,14 +1063,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp
index 7fc4b8fedeef4..51c47d6e42d81 100644
--- a/clang/test/OpenMP/for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp
@@ -353,14 +353,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -881,14 +881,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
index cb67c0b4b23b2..1dfc66fb95839 100644
--- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
@@ -224,14 +224,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -255,14 +255,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -286,14 +286,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -358,10 +358,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -442,10 +442,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -901,14 +901,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -932,14 +932,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -963,14 +963,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1035,10 +1035,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1119,10 +1119,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]

diff  --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
index 590daa75f3702..9c23a42584b05 100644
--- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
@@ -224,14 +224,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -255,14 +255,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -286,14 +286,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -358,10 +358,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -442,10 +442,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -901,14 +901,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -932,14 +932,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -963,14 +963,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1035,10 +1035,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1119,10 +1119,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]

diff  --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
index 0b78b407dd075..173247d360826 100644
--- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
@@ -348,14 +348,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -872,14 +872,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
index df366df3cdec5..a39afae3156fd 100644
--- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
@@ -303,14 +303,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -782,14 +782,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
index e2c71c1d708a5..df79f086ba1e0 100644
--- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
@@ -294,14 +294,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -764,14 +764,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
index eee8e56f46084..b620508a79286 100644
--- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
@@ -336,14 +336,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -848,14 +848,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp
index 80b27e6df1fb9..f6ed27e73ff70 100644
--- a/clang/test/OpenMP/reduction_implicit_map.cpp
+++ b/clang/test/OpenMP/reduction_implicit_map.cpp
@@ -735,10 +735,10 @@ int main()
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
 // CHECK1-NEXT:    [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to double*
-// CHECK1-NEXT:    [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to double*
+// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to double**
+// CHECK1-NEXT:    [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8
+// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8** [[DOTADDR1]] to double**
+// CHECK1-NEXT:    [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8
 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP2]]
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[TMP4]], [[TMP7]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]

diff  --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
index f6f96d52564f6..f0617e2de96c5 100644
--- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
@@ -341,14 +341,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -857,14 +857,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
index f0c0a902b4140..fdb77b47a27c4 100644
--- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
@@ -361,14 +361,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -898,14 +898,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
index 24604c1608dfb..ecfde122259f9 100644
--- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
@@ -307,14 +307,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -790,14 +790,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
index 9ded17d77f9d1..2f09139cfbc98 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -334,14 +334,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -707,14 +707,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1266,14 +1266,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1639,14 +1639,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //

diff  --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp
index 262ca72d0103e..686e11b9b9c96 100644
--- a/clang/test/OpenMP/task_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp
@@ -248,14 +248,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -279,14 +279,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -310,14 +310,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -382,10 +382,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -466,10 +466,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -919,14 +919,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -950,14 +950,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -981,14 +981,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1053,10 +1053,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1137,10 +1137,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]

diff  --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
index 41552686514d3..fa2f8e1ae9a6d 100644
--- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
@@ -224,14 +224,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -255,14 +255,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -286,14 +286,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -358,10 +358,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -442,10 +442,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -894,14 +894,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -925,14 +925,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -956,14 +956,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1028,10 +1028,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1112,10 +1112,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]

diff  --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
index 23c24fc97b47a..9a8daed3a72c7 100644
--- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
@@ -224,14 +224,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -255,14 +255,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK1-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -286,14 +286,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -358,10 +358,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -442,10 +442,10 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK1-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK1-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK1-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK1-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK1-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -894,14 +894,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -925,14 +925,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to float*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to float*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to float**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load float*, float** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to float**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load float*, float** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 8
+// CHECK2-NEXT:    store float [[ADD]], float* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -956,14 +956,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1028,10 +1028,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[REF_TMP:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct.S*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.S*
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to %struct.S**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load %struct.S*, %struct.S** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to %struct.S**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP4]], align 8
 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr [[STRUCT_S]], %struct.S* [[TMP3]], i64 5
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[TMP3]], [[TMP6]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]
@@ -1112,10 +1112,10 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[TMP3:%.*]] = call i8* @__kmpc_threadprivate_cached(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* bitcast (i64* @{{reduction_size[.].+[.]}})
 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8* [[TMP3]] to i64*
 // CHECK2-NEXT:    [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i16*
-// CHECK2-NEXT:    [[TMP8:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i16*
+// CHECK2-NEXT:    [[TMP6:%.*]] = bitcast i8** [[DOTADDR]] to i16**
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i16*, i16** [[TMP6]], align 8
+// CHECK2-NEXT:    [[TMP8:%.*]] = bitcast i8** [[DOTADDR1]] to i16**
+// CHECK2-NEXT:    [[TMP9:%.*]] = load i16*, i16** [[TMP8]], align 8
 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr i16, i16* [[TMP7]], i64 [[TMP5]]
 // CHECK2-NEXT:    [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i16* [[TMP7]], [[TMP10]]
 // CHECK2-NEXT:    br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]]

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
index 489ff2f8a2927..b5badb9acd863 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -340,14 +340,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -713,14 +713,14 @@ int main(int argc, char **argv) {
 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK1-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK1-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK1-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK1-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK1-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK1-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK1-NEXT:    ret void
 //
 //
@@ -1277,14 +1277,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //
@@ -1650,14 +1650,14 @@ int main(int argc, char **argv) {
 // CHECK2-NEXT:    [[DOTADDR1:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    store i8* [[TMP0]], i8** [[DOTADDR]], align 8
 // CHECK2-NEXT:    store i8* [[TMP1]], i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8
-// CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i32*
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8
-// CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i32*
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 8
-// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 8
+// CHECK2-NEXT:    [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to i32**
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[TMP2]], align 8
+// CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i8** [[DOTADDR1]] to i32**
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP3]], align 4
+// CHECK2-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
 // CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], [[TMP7]]
-// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 8
+// CHECK2-NEXT:    store i32 [[ADD]], i32* [[TMP3]], align 4
 // CHECK2-NEXT:    ret void
 //
 //


        


More information about the cfe-commits mailing list