[clang] e9bfa1d - [OPENMP]Use new interface for task reduction.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 24 06:53:46 PDT 2020


Author: Alexey Bataev
Date: 2020-04-24T09:41:48-04:00
New Revision: e9bfa1dd38d758cce80d12fd073117640f3c3bfd

URL: https://github.com/llvm/llvm-project/commit/e9bfa1dd38d758cce80d12fd073117640f3c3bfd
DIFF: https://github.com/llvm/llvm-project/commit/e9bfa1dd38d758cce80d12fd073117640f3c3bfd.diff

LOG: [OPENMP]Use new interface for task reduction.

Summary:
Patch forces codegen to use the new runtime functions for task reductions where
the issue with passing the address of the original variables to the UDR
initializers is fixed. Also, this patch is required for upcoming
support of task modifier inreduction clause.

Reviewers: jdoerfert

Subscribers: yaxunl, guansong, cfe-commits, caomhin

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78733

Added: 
    

Modified: 
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/lib/CodeGen/CGOpenMPRuntime.h
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
    clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
    clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
    clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
    clang/test/OpenMP/task_in_reduction_codegen.cpp
    clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
    clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
    clang/test/OpenMP/taskloop_reduction_codegen.cpp
    clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
    clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e6520903c783..4ad276e40c99 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -705,9 +705,8 @@ enum OpenMPRTLFunction {
   // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
   // *vec);
   OMPRTL__kmpc_doacross_wait,
-  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
-  // *data);
-  OMPRTL__kmpc_task_reduction_init,
+  // Call to void *__kmpc_taskred_init(int gtid, int num_data, void *data);
+  OMPRTL__kmpc_taskred_init,
   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
   // *d);
   OMPRTL__kmpc_task_reduction_get_th_data,
@@ -981,27 +980,37 @@ void ReductionCodeGen::emitAggregateInitialization(
 }
 
 ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
+                                   ArrayRef<const Expr *> Origs,
                                    ArrayRef<const Expr *> Privates,
                                    ArrayRef<const Expr *> ReductionOps) {
   ClausesData.reserve(Shareds.size());
   SharedAddresses.reserve(Shareds.size());
   Sizes.reserve(Shareds.size());
   BaseDecls.reserve(Shareds.size());
-  auto IPriv = Privates.begin();
-  auto IRed = ReductionOps.begin();
+  const auto *IOrig = Origs.begin();
+  const auto *IPriv = Privates.begin();
+  const auto *IRed = ReductionOps.begin();
   for (const Expr *Ref : Shareds) {
-    ClausesData.emplace_back(Ref, *IPriv, *IRed);
+    ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
+    std::advance(IOrig, 1);
     std::advance(IPriv, 1);
     std::advance(IRed, 1);
   }
 }
 
-void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
-  assert(SharedAddresses.size() == N &&
+void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
+  assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
          "Number of generated lvalues must be exactly N.");
-  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
-  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
+  LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
+  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
   SharedAddresses.emplace_back(First, Second);
+  if (ClausesData[N].Shared == ClausesData[N].Ref) {
+    OrigAddresses.emplace_back(First, Second);
+  } else {
+    LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
+    LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
+    OrigAddresses.emplace_back(First, Second);
+  }
 }
 
 void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
@@ -2318,14 +2327,12 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
     break;
   }
-  case OMPRTL__kmpc_task_reduction_init: {
-    // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
-    // *data);
+  case OMPRTL__kmpc_taskred_init: {
+    // Build void *__kmpc_taskred_init(int gtid, int num_data, void *data);
     llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
     auto *FnTy =
         llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
-    RTLFn =
-        CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskred_init");
     break;
   }
   case OMPRTL__kmpc_task_reduction_get_th_data: {
@@ -6546,7 +6553,7 @@ static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
 
 /// Emits reduction initializer function:
 /// \code
-/// void @.red_init(void* %arg) {
+/// void @.red_init(void* %arg, void* %orig) {
 /// %0 = bitcast void* %arg to <type>*
 /// store <type> <init>, <type>* %0
 /// ret void
@@ -6556,10 +6563,15 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
                                            SourceLocation Loc,
                                            ReductionCodeGen &RCG, unsigned N) {
   ASTContext &C = CGM.getContext();
+  QualType VoidPtrTy = C.VoidPtrTy;
+  VoidPtrTy.addRestrict();
   FunctionArgList Args;
-  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
                           ImplicitParamDecl::Other);
+  ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
+                              ImplicitParamDecl::Other);
   Args.emplace_back(&Param);
+  Args.emplace_back(&ParamOrig);
   const auto &FnInfo =
       CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
   llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
@@ -6584,28 +6596,25 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
                                 CGM.getContext().getSizeType(), Loc);
   }
   RCG.emitAggregateType(CGF, N, Size);
-  LValue SharedLVal;
+  LValue OrigLVal;
   // If initializer uses initializer from declare reduction construct, emit a
   // pointer to the address of the original reduction item (reuired by reduction
   // initializer)
   if (RCG.usesReductionInitializer(N)) {
-    Address SharedAddr =
-        CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
-            CGF, CGM.getContext().VoidPtrTy,
-            generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
+    Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
     SharedAddr = CGF.EmitLoadOfPointer(
         SharedAddr,
         CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
-    SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
+    OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
   } else {
-    SharedLVal = CGF.MakeNaturalAlignAddrLValue(
+    OrigLVal = CGF.MakeNaturalAlignAddrLValue(
         llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
         CGM.getContext().VoidPtrTy);
   }
   // Emit the initializer:
   // %0 = bitcast void* %arg to <type>*
   // store <type> <init>, <type>* %0
-  RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
+  RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal,
                          [](CodeGenFunction &) { return false; });
   CGF.FinishFunction();
   return Fn;
@@ -6745,18 +6754,20 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
     return nullptr;
 
   // Build typedef struct:
-  // kmp_task_red_input {
+  // kmp_taskred_input {
   //   void *reduce_shar; // shared reduction item
+  //   void *reduce_orig; // original reduction item used for initialization
   //   size_t reduce_size; // size of data item
   //   void *reduce_init; // data initialization routine
   //   void *reduce_fini; // data finalization routine
   //   void *reduce_comb; // data combiner routine
   //   kmp_task_red_flags_t flags; // flags for additional info from compiler
-  // } kmp_task_red_input_t;
+  // } kmp_taskred_input_t;
   ASTContext &C = CGM.getContext();
-  RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
+  RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
   RD->startDefinition();
   const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
+  const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
   const FieldDecl *InitFD  = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
   const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
@@ -6771,8 +6782,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
   // kmp_task_red_input_t .rd_input.[Size];
   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
-  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionCopies,
-                       Data.ReductionOps);
+  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionVars,
+                       Data.ReductionCopies, Data.ReductionOps);
   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
     llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
@@ -6784,20 +6795,24 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
     LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
     // ElemLVal.reduce_shar = &Shareds[Cnt];
     LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
-    RCG.emitSharedLValue(CGF, Cnt);
+    RCG.emitSharedOrigLValue(CGF, Cnt);
     llvm::Value *CastedShared =
         CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
     CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
+    // ElemLVal.reduce_orig = &Origs[Cnt];
+    LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
+    llvm::Value *CastedOrig =
+        CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
+    CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
     RCG.emitAggregateType(CGF, Cnt);
     llvm::Value *SizeValInChars;
     llvm::Value *SizeVal;
     std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
-    // We use delayed creation/initialization for VLAs, array sections and
-    // custom reduction initializations. It is required because runtime does not
-    // provide the way to pass the sizes of VLAs/array sections to
-    // initializer/combiner/finalizer functions and does not pass the pointer to
-    // original reduction item to the initializer. Instead threadprivate global
-    // variables are used to store these values and use them in the functions.
+    // We use delayed creation/initialization for VLAs and array sections. It is
+    // required because runtime does not provide the way to pass the sizes of
+    // VLAs/array sections to initializer/combiner/finalizer functions. Instead
+    // threadprivate global variables are used to store these values and use
+    // them in the functions.
     bool DelayedCreation = !!SizeVal;
     SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
                                                /*isSigned=*/false);
@@ -6808,7 +6823,6 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
     llvm::Value *InitAddr =
         CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
     CGF.EmitStoreOfScalar(InitAddr, InitLVal);
-    DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
     // ElemLVal.reduce_fini = fini;
     LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
     llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
@@ -6832,16 +6846,15 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
                                  FlagsLVal.getType());
   }
-  // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
-  // *data);
+  // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
   llvm::Value *Args[] = {
       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
                                 /*isSigned=*/true),
       llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
       CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
                                                       CGM.VoidPtrTy)};
-  return CGF.EmitRuntimeCall(
-      createRuntimeFunction(OMPRTL__kmpc_task_reduction_init), Args);
+  return CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskred_init),
+                             Args);
 }
 
 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
@@ -6859,16 +6872,6 @@ void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
         generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
     CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
   }
-  // Store address of the original reduction item if custom initializer is used.
-  if (RCG.usesReductionInitializer(N)) {
-    Address SharedAddr = getAddrOfArtificialThreadPrivate(
-        CGF, CGM.getContext().VoidPtrTy,
-        generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
-    CGF.Builder.CreateStore(
-        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-            RCG.getSharedLValue(N).getPointer(CGF), CGM.VoidPtrTy),
-        SharedAddr, /*IsVolatile=*/false);
-  }
 }
 
 Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index e0efa5bd95df..830b1edc61f4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -125,20 +125,26 @@ class ReductionCodeGen {
 private:
   /// Data required for codegen of reduction clauses.
   struct ReductionData {
-    /// Reference to the original shared item.
+    /// Reference to the item shared between tasks to reduce into.
+    const Expr *Shared = nullptr;
+    /// Reference to the original item.
     const Expr *Ref = nullptr;
     /// Helper expression for generation of private copy.
     const Expr *Private = nullptr;
     /// Helper expression for generation reduction operation.
     const Expr *ReductionOp = nullptr;
-    ReductionData(const Expr *Ref, const Expr *Private, const Expr *ReductionOp)
-        : Ref(Ref), Private(Private), ReductionOp(ReductionOp) {}
+    ReductionData(const Expr *Shared, const Expr *Ref, const Expr *Private,
+                  const Expr *ReductionOp)
+        : Shared(Shared), Ref(Ref), Private(Private), ReductionOp(ReductionOp) {
+    }
   };
   /// List of reduction-based clauses.
   SmallVector<ReductionData, 4> ClausesData;
 
-  /// List of addresses of original shared variables/expressions.
+  /// List of addresses of shared variables/expressions.
   SmallVector<std::pair<LValue, LValue>, 4> SharedAddresses;
+  /// List of addresses of original variables/expressions.
+  SmallVector<std::pair<LValue, LValue>, 4> OrigAddresses;
   /// Sizes of the reduction items in chars.
   SmallVector<std::pair<llvm::Value *, llvm::Value *>, 4> Sizes;
   /// Base declarations for the reduction items.
@@ -158,12 +164,12 @@ class ReductionCodeGen {
                                    const OMPDeclareReductionDecl *DRD);
 
 public:
-  ReductionCodeGen(ArrayRef<const Expr *> Shareds,
+  ReductionCodeGen(ArrayRef<const Expr *> Shareds, ArrayRef<const Expr *> Origs,
                    ArrayRef<const Expr *> Privates,
                    ArrayRef<const Expr *> ReductionOps);
-  /// Emits lvalue for a reduction item.
+  /// Emits lvalue for the shared and original reduction item.
   /// \param N Number of the reduction item.
-  void emitSharedLValue(CodeGenFunction &CGF, unsigned N);
+  void emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N);
   /// Emits the code for the variable-modified type, if required.
   /// \param N Number of the reduction item.
   void emitAggregateType(CodeGenFunction &CGF, unsigned N);
@@ -195,6 +201,8 @@ class ReductionCodeGen {
                                Address PrivateAddr);
   /// Returns LValue for the reduction item.
   LValue getSharedLValue(unsigned N) const { return SharedAddresses[N].first; }
+  /// Returns LValue for the original reduction item.
+  LValue getOrigLValue(unsigned N) const { return OrigAddresses[N].first; }
   /// Returns the size of the reduction item (in chars and total number of
   /// elements in the item), or nullptr, if the size is a constant.
   std::pair<llvm::Value *, llvm::Value *> getSizes(unsigned N) const {
@@ -1434,9 +1442,7 @@ class CGOpenMPRuntime {
 
   /// Required to resolve existing problems in the runtime. Emits threadprivate
   /// variables to store the size of the VLAs/array sections for
-  /// initializer/combiner/finalizer functions + emits threadprivate variable to
-  /// store the pointer to the original reduction item for the custom
-  /// initializer defined by declare reduction construct.
+  /// initializer/combiner/finalizer functions.
   /// \param RCG Allows to reuse an existing data for the reductions.
   /// \param N Reduction item for which fixups must be emitted.
   virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc,

diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 8366bbe73b5a..cbc08ac2efb4 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1186,15 +1186,15 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
       std::advance(IRHS, 1);
     }
   }
-  ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
+  ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
   unsigned Count = 0;
-  auto ILHS = LHSs.begin();
-  auto IRHS = RHSs.begin();
-  auto IPriv = Privates.begin();
+  auto *ILHS = LHSs.begin();
+  auto *IRHS = RHSs.begin();
+  auto *IPriv = Privates.begin();
   for (const Expr *IRef : Shareds) {
     const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
     // Emit private VarDecl with reduction init.
-    RedCG.emitSharedLValue(*this, Count);
+    RedCG.emitSharedOrigLValue(*this, Count);
     RedCG.emitAggregateType(*this, Count);
     AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
     RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
@@ -3478,12 +3478,12 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
       }
       (void)FirstprivateScope.Privatize();
       OMPLexicalScope LexScope(CGF, S, CapturedRegion);
-      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
-                             Data.ReductionOps);
+      ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
+                             Data.ReductionCopies, Data.ReductionOps);
       llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
           CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
-        RedCG.emitSharedLValue(CGF, Cnt);
+        RedCG.emitSharedOrigLValue(CGF, Cnt);
         RedCG.emitAggregateType(CGF, Cnt);
         // FIXME: This must removed once the runtime library is fixed.
         // Emit required threadprivate variables for
@@ -3528,9 +3528,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
     // privatized earlier.
     OMPPrivateScope InRedScope(CGF);
     if (!InRedVars.empty()) {
-      ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
+      ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
       for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
-        RedCG.emitSharedLValue(CGF, Cnt);
+        RedCG.emitSharedOrigLValue(CGF, Cnt);
         RedCG.emitAggregateType(CGF, Cnt);
         // The taskgroup descriptor variable is always implicit firstprivate and
         // privatized already during processing of the firstprivates.

diff  --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
index 8cff588a87b4..5d6d1645408f 100644
--- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp
@@ -40,10 +40,10 @@ int main(int argc, char **argv) {
 
 // CHECK-LABEL: @main
 // CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
-// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
+// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
+// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
 // CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])

diff  --git a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
index 47cf92f03489..eefa0bc68914 100644
--- a/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_reduction_codegen.cpp
@@ -52,7 +52,7 @@ sum = 0.0;
 // CHECK:    [[C:%.*]] = alloca [100 x %struct.S],
 // CHECK:    [[D:%.*]] = alloca float*,
 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
-// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_taskred_input_t],
 // CHECK:    alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
@@ -72,16 +72,19 @@ sum = 0.0;
 // CHECK:    call void @__kmpc_taskgroup(%struct.ident_t*
 // CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
 // CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
-// CHECK-DAG:    [[TMP20]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.+]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
+// CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
-// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
-// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
-// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
-// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0
@@ -89,7 +92,10 @@ sum = 0.0;
 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
 // CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
-// CHECK-DAG:    [[TMP28]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
+// CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
 // CHECK-DAG:    [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64
 // CHECK-DAG:    [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64
 // CHECK-DAG:    [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]]
@@ -97,51 +103,57 @@ sum = 0.0;
 // CHECK-DAG:    [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1
 // CHECK-DAG:    [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
-// CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
-// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
-// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    [[TMP38]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
+// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
 // CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
-// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
+// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
-// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
+// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D]],
 // CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
 // CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
-// CHECK-DAG:    [[TMP43]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
+// CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
-// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
-// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
-// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
-// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
-// CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
+// CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
 // CHECK-DAG:    [[TMP54:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK-DAG:    [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
-// CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
-// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
-// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    [[TMP56]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
+// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
-// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
+// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
-// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
+// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8*
-// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
+// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
 // CHECK:    [[TMP63:%.*]] = load i32, i32* [[N]],
 // CHECK:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]],
 // CHECK:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]],
@@ -160,7 +172,7 @@ sum = 0.0;
 
 // CHECK:    ret i32
 
-// CHECK: define internal void @[[RED_INIT1]](i8* %0)
+// CHECK: define internal void @[[RED_INIT1]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -169,11 +181,9 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT2]](i8* %0)
+// CHECK: define internal void @[[RED_INIT2]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: [[ORIG_PTR_ADDR:%.+]] = call i8* @__kmpc_threadprivate_cached(
-// CHECK: [[ORIG_PTR_REF:%.+]] = bitcast i8* [[ORIG_PTR_ADDR]] to i8**
-// CHECK: load i8*, i8** [[ORIG_PTR_REF]],
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call void [[OMP_INIT1:@.+]](
 // CHECK: ret void
 
@@ -193,7 +203,7 @@ sum = 0.0;
 // CHECK: call void [[OMP_COMB1]](
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT3]](i8* %0)
+// CHECK: define internal void @[[RED_INIT3]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -202,7 +212,7 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT4]](i8* %0)
+// CHECK: define internal void @[[RED_INIT4]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
@@ -216,7 +226,6 @@ sum = 0.0;
 // CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
 // CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
 // CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(

diff  --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
index 7555d6563136..58f1b0e034b0 100644
--- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp
@@ -40,10 +40,10 @@ int main(int argc, char **argv) {
 
 // CHECK-LABEL: @main
 // CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
-// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
+// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
+// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
 // CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])

diff  --git a/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
index 7eb7844dfda2..d6aeb5fce0bc 100644
--- a/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/master_taskloop_simd_reduction_codegen.cpp
@@ -52,7 +52,7 @@ sum = 0.0;
 // CHECK:    [[C:%.*]] = alloca [100 x %struct.S],
 // CHECK:    [[D:%.*]] = alloca float*,
 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
-// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_taskred_input_t],
 // CHECK:    alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
@@ -68,16 +68,19 @@ sum = 0.0;
 // CHECK:    call void @__kmpc_taskgroup(%struct.ident_t*
 // CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
 // CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
-// CHECK-DAG:    [[TMP20]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.+]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
+// CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 1
+// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
-// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
-// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
-// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
-// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0
@@ -85,7 +88,10 @@ sum = 0.0;
 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
 // CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
-// CHECK-DAG:    [[TMP28]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
+// CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 1
 // CHECK-DAG:    [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64
 // CHECK-DAG:    [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64
 // CHECK-DAG:    [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]]
@@ -93,51 +99,57 @@ sum = 0.0;
 // CHECK-DAG:    [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1
 // CHECK-DAG:    [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
-// CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
-// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
-// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    [[TMP38]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
+// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
 // CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
-// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
+// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
-// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
+// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D]],
 // CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
 // CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
-// CHECK-DAG:    [[TMP43]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
+// CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 1
+// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
-// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
-// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
-// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
-// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
-// CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
+// CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 1
 // CHECK-DAG:    [[TMP54:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK-DAG:    [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
-// CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
-// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
-// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    [[TMP56]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
+// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
-// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
+// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
-// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
+// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8*
-// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
+// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
 // CHECK:    [[TMP63:%.*]] = load i32, i32* [[N]],
 // CHECK:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]],
 // CHECK:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]],
@@ -153,7 +165,7 @@ sum = 0.0;
 
 // CHECK:    ret i32
 
-// CHECK: define internal void @[[RED_INIT1]](i8* %0)
+// CHECK: define internal void @[[RED_INIT1]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -162,8 +174,7 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT2]](i8* %0)
-// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: define internal void @[[RED_INIT2]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call void [[OMP_INIT1:@.+]](
 // CHECK: ret void
@@ -184,7 +195,7 @@ sum = 0.0;
 // CHECK: call void [[OMP_COMB1]](
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT3]](i8* %0)
+// CHECK: define internal void @[[RED_INIT3]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -193,7 +204,7 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT4]](i8* %0)
+// CHECK: define internal void @[[RED_INIT4]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
index 9b77efe5555d..a8c15909ac61 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_reduction_codegen.cpp
@@ -61,7 +61,7 @@ sum = 0.0;
 
 // CHECK:    [[SUM_ADDR:%.*]] = alloca float*,
 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
-// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_taskred_input_t],
 // CHECK:    alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
@@ -72,16 +72,19 @@ sum = 0.0;
 // CHECK:    call void @__kmpc_taskgroup(%struct.ident_t*
 // CHECK-DAG:    [[TMP21:%.*]] = bitcast float* %{{.+}} to i8*
 // CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
-// CHECK-DAG:    [[TMP20]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.+]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP21:%.*]] = bitcast float* %{{.+}} to i8*
+// CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 1
+// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
-// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
-// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
-// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
-// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C:%.+]], i64 0, i64 0
@@ -89,7 +92,10 @@ sum = 0.0;
 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
 // CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
-// CHECK-DAG:    [[TMP28]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
+// CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 1
 // CHECK-DAG:    [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64
 // CHECK-DAG:    [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64
 // CHECK-DAG:    [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]]
@@ -97,51 +103,57 @@ sum = 0.0;
 // CHECK-DAG:    [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1
 // CHECK-DAG:    [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
-// CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
-// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
-// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    [[TMP38]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
+// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
 // CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
-// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
+// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
-// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
+// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D:%.+]],
 // CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
 // CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
-// CHECK-DAG:    [[TMP43]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
+// CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 1
+// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
-// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
-// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
-// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
-// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA:%.+]] to i8*
 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
-// CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA:%.+]] to i8*
+// CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 1
 // CHECK-DAG:    [[TMP54:%.*]] = mul nuw i64 [[TMP2:%.+]], 4
 // CHECK-DAG:    [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
-// CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
-// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
-// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    [[TMP56]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
+// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
-// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
+// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
-// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
+// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8*
-// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0:%.+]], i32 4, i8* [[TMP61]])
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
+// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0:%.+]], i32 4, i8* [[TMP61]])
 // CHECK:    [[TMP63:%.*]] = load i32, i32* [[N:%.+]],
 // CHECK:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]],
 // CHECK:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]],
@@ -158,7 +170,7 @@ sum = 0.0;
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
 // CHECK:       [[EXIT]]
 
-// CHECK: define internal void @[[RED_INIT1]](i8* %0)
+// CHECK: define internal void @[[RED_INIT1]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -167,11 +179,8 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT2]](i8* %0)
+// CHECK: define internal void @[[RED_INIT2]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: [[ORIG_PTR_ADDR:%.+]] = call i8* @__kmpc_threadprivate_cached(
-// CHECK: [[ORIG_PTR_REF:%.+]] = bitcast i8* [[ORIG_PTR_ADDR]] to i8**
-// CHECK: load i8*, i8** [[ORIG_PTR_REF]],
 // CHECK: call void [[OMP_INIT1:@.+]](
 // CHECK: ret void
 
@@ -191,7 +200,7 @@ sum = 0.0;
 // CHECK: call void [[OMP_COMB1]](
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT3]](i8* %0)
+// CHECK: define internal void @[[RED_INIT3]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -200,7 +209,7 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT4]](i8* %0)
+// CHECK: define internal void @[[RED_INIT4]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
@@ -214,7 +223,6 @@ sum = 0.0;
 // CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
 // CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
 // CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
index 15e2b8e7d5e7..03706ff6450b 100644
--- a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
@@ -61,7 +61,7 @@ sum = 0.0;
 
 // CHECK:    [[SUM_ADDR:%.*]] = alloca float*,
 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
-// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_taskred_input_t],
 // CHECK:    alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
@@ -72,16 +72,19 @@ sum = 0.0;
 // CHECK:    call void @__kmpc_taskgroup(%struct.ident_t*
 // CHECK-DAG:    [[TMP21:%.*]] = bitcast float* %{{.+}} to i8*
 // CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
-// CHECK-DAG:    [[TMP20]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.+]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP21:%.*]] = bitcast float* %{{.+}} to i8*
+// CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
-// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
-// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
-// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
-// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C:%.+]], i64 0, i64 0
@@ -89,7 +92,10 @@ sum = 0.0;
 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
 // CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
-// CHECK-DAG:    [[TMP28]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
+// CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
 // CHECK-DAG:    [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64
 // CHECK-DAG:    [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64
 // CHECK-DAG:    [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]]
@@ -97,51 +103,57 @@ sum = 0.0;
 // CHECK-DAG:    [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1
 // CHECK-DAG:    [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
-// CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
-// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
-// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    [[TMP38]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
+// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
 // CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
-// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
+// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
-// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
+// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D:%.+]],
 // CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
 // CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
-// CHECK-DAG:    [[TMP43]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
+// CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
-// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
-// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
-// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
-// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA:%.+]] to i8*
 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
-// CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA:%.+]] to i8*
+// CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
 // CHECK-DAG:    [[TMP54:%.*]] = mul nuw i64 [[TMP2:%.+]], 4
 // CHECK-DAG:    [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
-// CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
-// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
-// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    [[TMP56]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
+// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
-// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
+// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
-// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
+// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8*
-// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0:%.+]], i32 4, i8* [[TMP61]])
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
+// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0:%.+]], i32 4, i8* [[TMP61]])
 // CHECK:    [[TMP63:%.*]] = load i32, i32* [[N:%.+]],
 // CHECK:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]],
 // CHECK:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]],
@@ -158,7 +170,7 @@ sum = 0.0;
 // CHECK-NEXT:  br label {{%?}}[[EXIT]]
 // CHECK:       [[EXIT]]
 
-// CHECK: define internal void @[[RED_INIT1]](i8* %0)
+// CHECK: define internal void @[[RED_INIT1]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -167,11 +179,8 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT2]](i8* %0)
+// CHECK: define internal void @[[RED_INIT2]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: [[ORIG_PTR_ADDR:%.+]] = call i8* @__kmpc_threadprivate_cached(
-// CHECK: [[ORIG_PTR_REF:%.+]] = bitcast i8* [[ORIG_PTR_ADDR]] to i8**
-// CHECK: load i8*, i8** [[ORIG_PTR_REF]],
 // CHECK: call void [[OMP_INIT1:@.+]](
 // CHECK: ret void
 
@@ -191,7 +200,7 @@ sum = 0.0;
 // CHECK: call void [[OMP_COMB1]](
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT3]](i8* %0)
+// CHECK: define internal void @[[RED_INIT3]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -200,7 +209,7 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT4]](i8* %0)
+// CHECK: define internal void @[[RED_INIT4]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
@@ -214,7 +223,6 @@ sum = 0.0;
 // CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
 // CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(
 // CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
 // CHECK: call i8* @__kmpc_task_reduction_get_th_data(

diff  --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp
index 07a701d84ad6..29051aa3702d 100644
--- a/clang/test/OpenMP/task_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp
@@ -51,10 +51,10 @@ int main(int argc, char **argv) {
 
 // CHECK-LABEL: @main
 // CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
-// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
+// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
+// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
 // CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])

diff  --git a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
index 05298deccc8e..24f433bc6888 100644
--- a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp
@@ -65,91 +65,106 @@ int main(int argc, char **argv) {
 // CHECK-DAG:   [[BC_A:%.+]] = bitcast i32* [[A]] to i8*
 // CHECK-DAG:   store i8* [[BC_A]], i8** [[A_REF:[^,]+]],
 // CHECK-DAG:   [[A_REF]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA:%[^,]+]], i32 0, i32 0
+// CHECK-DAG:   [[BC_A:%.+]] = bitcast i32* [[A]] to i8*
+// CHECK-DAG:   store i8* [[BC_A]], i8** [[A_REF:[^,]+]],
+// CHECK-DAG:   [[A_REF]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 1
 // CHECK-DAG:   [[GEPA]] = getelementptr inbounds [3 x [[T1]]], [3 x [[T1]]]* [[RD_IN1]], i64 0, i64
-// CHECK-DAG:   [[TMP6:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 1
+// CHECK-DAG:   [[TMP6:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 2
 // CHECK-DAG:   store i64 4, i64* [[TMP6]],
-// CHECK-DAG:   [[TMP7:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[AINIT:.+]] to i8*), i8** [[TMP7]],
-// CHECK-DAG:   [[TMP8:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 3
+// CHECK-DAG:   [[TMP7:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 3
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[AINIT:.+]] to i8*), i8** [[TMP7]],
+// CHECK-DAG:   [[TMP8:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 4
 // CHECK-DAG:   store i8* null, i8** [[TMP8]],
-// CHECK-DAG:   [[TMP9:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 4
+// CHECK-DAG:   [[TMP9:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 5
 // CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[ACOMB:.+]] to i8*), i8** [[TMP9]],
-// CHECK-DAG:   [[TMP10:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 5
+// CHECK-DAG:   [[TMP10:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPA]], i32 0, i32 6
 // CHECK-DAG:   [[TMP11:%.+]] = bitcast i32* [[TMP10]] to i8*
 // CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP11]], i8 0, i64 4, i1 false)
 // CHECK-DAG:   [[TMP13:%.+]] = bitcast float* [[B]] to i8*
 // CHECK-DAG:   store i8* [[TMP13]], i8** [[TMP12:%[^,]+]],
 // CHECK-DAG:   [[TMP12]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB:%[^,]+]], i32 0, i32 0
+// CHECK-DAG:   [[TMP13:%.+]] = bitcast float* [[B]] to i8*
+// CHECK-DAG:   store i8* [[TMP13]], i8** [[TMP12:%[^,]+]],
+// CHECK-DAG:   [[TMP12]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 1
 // CHECK-DAG:   [[GEPB]] = getelementptr inbounds [3 x [[T1]]], [3 x [[T1]]]* [[RD_IN1]], i64 0, i64
-// CHECK-DAG:   [[TMP14:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 1
+// CHECK-DAG:   [[TMP14:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 2
 // CHECK-DAG:   store i64 4, i64* [[TMP14]],
-// CHECK-DAG:   [[TMP15:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[BINIT:.+]] to i8*), i8** [[TMP15]],
-// CHECK-DAG:   [[TMP16:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 3
+// CHECK-DAG:   [[TMP15:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 3
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[BINIT:.+]] to i8*), i8** [[TMP15]],
+// CHECK-DAG:   [[TMP16:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 4
 // CHECK-DAG:   store i8* null, i8** [[TMP16]],
-// CHECK-DAG:   [[TMP17:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 4
+// CHECK-DAG:   [[TMP17:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 5
 // CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[BCOMB:.+]] to i8*), i8** [[TMP17]],
-// CHECK-DAG:   [[TMP18:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 5
+// CHECK-DAG:   [[TMP18:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPB]], i32 0, i32 6
 // CHECK-DAG:   [[TMP19:%.+]] = bitcast i32* [[TMP18]] to i8*
 // CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP19]], i8 0, i64 4, i1 false)
 // CHECK-DAG:   [[TMP21:%.+]] = bitcast i32* [[ARGC_ADDR]] to i8*
 // CHECK-DAG:   store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
 // CHECK-DAG:   [[TMP20]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC:%[^,]+]], i32 0, i32 0
+// CHECK-DAG:   [[TMP21:%.+]] = bitcast i32* [[ARGC_ADDR]] to i8*
+// CHECK-DAG:   store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
+// CHECK-DAG:   [[TMP20]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 1
 // CHECK-DAG:   [[GEPARGC]] = getelementptr inbounds [3 x [[T1]]], [3 x [[T1]]]* [[RD_IN1]], i64 0, i64
-// CHECK-DAG:   [[TMP22:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 1
+// CHECK-DAG:   [[TMP22:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 2
 // CHECK-DAG:   store i64 4, i64* [[TMP22]],
-// CHECK-DAG:   [[TMP23:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[ARGCINIT:.+]] to i8*), i8** [[TMP23]],
-// CHECK-DAG:   [[TMP24:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 3
+// CHECK-DAG:   [[TMP23:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 3
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[ARGCINIT:.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:   [[TMP24:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 4
 // CHECK-DAG:   store i8* null, i8** [[TMP24]],
-// CHECK-DAG:   [[TMP25:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 4
+// CHECK-DAG:   [[TMP25:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 5
 // CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[ARGCCOMB:.+]] to i8*), i8** [[TMP25]],
-// CHECK-DAG:   [[TMP26:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 5
+// CHECK-DAG:   [[TMP26:%.+]] = getelementptr inbounds [[T1]], [[T1]]* [[GEPARGC]], i32 0, i32 6
 // CHECK-DAG:   [[TMP27:%.+]] = bitcast i32* [[TMP26]] to i8*
 // CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:   [[TMP28:%.+]] = bitcast [3 x [[T1]]]* [[RD_IN1]] to i8*
-// CHECK-DAG:   [[TMP29:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* [[TMP28]])
+// CHECK-DAG:   [[TMP29:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* [[TMP28]])
 // DEBUG-DAG:   call void @llvm.dbg.declare(metadata i8** [[TD1]],
 // CHECK-DAG:   store i8* [[TMP29]], i8** [[TD1]],
 // CHECK-DAG:   call void @__kmpc_taskgroup(%struct.ident_t* {{[^,]+}}, i32 [[GTID]])
 // CHECK-DAG:   [[TMP31:%.+]] = bitcast [5 x %struct.S]* [[C]] to i8*
 // CHECK-DAG:   store i8* [[TMP31]], i8** [[TMP30:%[^,]+]],
 // CHECK-DAG:   [[TMP30]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC:%[^,]+]], i32 0, i32 0
+// CHECK-DAG:   [[TMP31:%.+]] = bitcast [5 x %struct.S]* [[C]] to i8*
+// CHECK-DAG:   store i8* [[TMP31]], i8** [[TMP30:%[^,]+]],
+// CHECK-DAG:   [[TMP30]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 1
 // CHECK-DAG:   [[GEPC]] = getelementptr inbounds [2 x [[T2]]], [2 x [[T2]]]* [[RD_IN2]], i64 0, i64
-// CHECK-DAG:   [[TMP32:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 1
+// CHECK-DAG:   [[TMP32:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 2
 // CHECK-DAG:   store i64 20, i64* [[TMP32]],
-// CHECK-DAG:   [[TMP33:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[CINIT:.+]] to i8*), i8** [[TMP33]],
-// CHECK-DAG:   [[TMP34:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 3
+// CHECK-DAG:   [[TMP33:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 3
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[CINIT:.+]] to i8*), i8** [[TMP33]],
+// CHECK-DAG:   [[TMP34:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 4
 // CHECK-DAG:   store i8* bitcast (void (i8*)* @[[CFINI:.+]] to i8*), i8** [[TMP34]],
-// CHECK-DAG:   [[TMP35:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 4
+// CHECK-DAG:   [[TMP35:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 5
 // CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[CCOMB:.+]] to i8*), i8** [[TMP35]],
-// CHECK-DAG:   [[TMP36:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 5
+// CHECK-DAG:   [[TMP36:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPC]], i32 0, i32 6
 // CHECK-DAG:   [[TMP37:%.+]] = bitcast i32* [[TMP36]] to i8*
 // CHECK-DAG:   call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP37]], i8 0, i64 4, i1 false)
 // CHECK-DAG:   [[TMP39:%.+]] = bitcast i16* [[VLA]] to i8*
 // CHECK-DAG:   store i8* [[TMP39]], i8** [[TMP38:%[^,]+]],
 // CHECK-DAG:   [[TMP38]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA:%[^,]+]], i32 0, i32 0
+// CHECK-DAG:   [[TMP39:%.+]] = bitcast i16* [[VLA]] to i8*
+// CHECK-DAG:   store i8* [[TMP39]], i8** [[TMP38:%[^,]+]],
+// CHECK-DAG:   [[TMP38]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 1
 // CHECK-DAG:   [[GEPVLA]] = getelementptr inbounds [2 x [[T2]]], [2 x [[T2]]]* [[RD_IN2]], i64 0, i64
 // CHECK-DAG:   [[TMP40:%.+]] = mul nuw i64 [[VLA_SIZE]], 2
 // CHECK-DAG:   [[TMP41:%.+]] = udiv exact i64 [[TMP40]], ptrtoint (i16* getelementptr (i16, i16* null, i32 1) to i64)
-// CHECK-DAG:   [[TMP42:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 1
+// CHECK-DAG:   [[TMP42:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 2
 // CHECK-DAG:   store i64 [[TMP40]], i64* [[TMP42]],
-// CHECK-DAG:   [[TMP43:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 2
-// CHECK-DAG:   store i8* bitcast (void (i8*)* @[[VLAINIT:.+]] to i8*), i8** [[TMP43]],
-// CHECK-DAG:   [[TMP44:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 3
+// CHECK-DAG:   [[TMP43:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 3
+// CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[VLAINIT:.+]] to i8*), i8** [[TMP43]],
+// CHECK-DAG:   [[TMP44:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 4
 // CHECK-DAG:   store i8* null, i8** [[TMP44]],
-// CHECK-DAG:   [[TMP45:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 4
+// CHECK-DAG:   [[TMP45:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 5
 // CHECK-DAG:   store i8* bitcast (void (i8*, i8*)* @[[VLACOMB:.+]] to i8*), i8** [[TMP45]],
-// CHECK-DAG:   [[TMP46:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 5
+// CHECK-DAG:   [[TMP46:%.+]] = getelementptr inbounds [[T2]], [[T2]]* [[GEPVLA]], i32 0, i32 6
 // CHECK-DAG:   store i32 1, i32* [[TMP46]],
 // CHECK:       [[TMP47:%.+]] = bitcast [2 x [[T2]]]* [[RD_IN2]] to i8*
-// CHECK:       [[TMP48:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* [[TMP47]])
+// CHECK:       [[TMP48:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* [[TMP47]])
 // CHECK:       store i8* [[TMP48]], i8** [[TD2]],
 // CHECK:       call void @__kmpc_end_taskgroup(%struct.ident_t* {{[^,]+}}, i32 [[GTID]])
 // CHECK:       call void @__kmpc_end_taskgroup(%struct.ident_t* {{[^,]+}}, i32 [[GTID]])
 
-// CHECK-DAG: define internal void @[[AINIT]](i8* %0)
+// CHECK-DAG: define internal void @[[AINIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK-DAG: store i32 0, i32* %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
@@ -160,7 +175,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void @[[BINIT]](i8* %0)
+// CHECK-DAG: define internal void @[[BINIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK-DAG: store float 0.000000e+00, float* %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
@@ -171,7 +186,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void @[[ARGCINIT]](i8* %0)
+// CHECK-DAG: define internal void @[[ARGCINIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK-DAG: store i32 0, i32* %
 // CHECK-DAG: ret void
 // CHECK-DAG: }
@@ -182,7 +197,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG: ret void
 // CHECK-DAG: }
 
-// CHECK-DAG: define internal void @[[CINIT]](i8* %0)
+// CHECK-DAG: define internal void @[[CINIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK-DAG: phi %struct.S* [
 // CHECK-DAG: call {{.+}}(%struct.S* {{.+}})
 // CHECK-DAG: br i1 %
@@ -206,7 +221,7 @@ int main(int argc, char **argv) {
 // CHECK-DAG: ret void
 // CHECK_DAG: }
 
-// CHECK-DAG: define internal void @[[VLAINIT]](i8* %0)
+// CHECK-DAG: define internal void @[[VLAINIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK-DAG: call i32 @__kmpc_global_thread_num(%struct.ident_t* {{[^,]+}})
 // CHECK-DAG: call i8* @__kmpc_threadprivate_cached(%struct.ident_t*
 // CHECK-DAG: phi i16* [

diff  --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
index dbb34df1b429..e648c2df50b8 100644
--- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp
@@ -40,10 +40,10 @@ int main(int argc, char **argv) {
 
 // CHECK-LABEL: @main
 // CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
-// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
+// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
+// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
 // CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])

diff  --git a/clang/test/OpenMP/taskloop_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_reduction_codegen.cpp
index 69f61140bc7c..919feeba41e8 100644
--- a/clang/test/OpenMP/taskloop_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_reduction_codegen.cpp
@@ -5,7 +5,6 @@
 // expected-no-diagnostics
 
 // CHECK: [[RED_SIZE1:@reduction_size[.].+]] = common thread_local global i64 0
-// CHECK: [[RED1:@reduction[.].+]] = common thread_local global i8* null
 // CHECK: [[RED_SIZE2:@reduction_size[.].+]] = common thread_local global i64 0
 
 struct S {
@@ -56,7 +55,7 @@ sum = 0.0;
 // CHECK:    [[C:%.*]] = alloca [100 x %struct.S],
 // CHECK:    [[D:%.*]] = alloca float*,
 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
-// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_taskred_input_t],
 // CHECK:    alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
@@ -72,16 +71,19 @@ sum = 0.0;
 // CHECK:    call void @__kmpc_taskgroup(%struct.ident_t*
 // CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
 // CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
-// CHECK-DAG:    [[TMP20]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.+]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
+// CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
-// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
-// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
-// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
-// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0
@@ -89,7 +91,10 @@ sum = 0.0;
 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
 // CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
-// CHECK-DAG:    [[TMP28]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
+// CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
 // CHECK-DAG:    [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64
 // CHECK-DAG:    [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64
 // CHECK-DAG:    [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]]
@@ -97,51 +102,57 @@ sum = 0.0;
 // CHECK-DAG:    [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1
 // CHECK-DAG:    [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
-// CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
-// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
-// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    [[TMP38]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
+// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
 // CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
-// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
+// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
-// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
+// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D]],
 // CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
 // CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
-// CHECK-DAG:    [[TMP43]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
+// CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
-// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
-// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
-// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
-// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
-// CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
+// CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
 // CHECK-DAG:    [[TMP54:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK-DAG:    [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
-// CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
-// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
-// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    [[TMP56]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
+// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
-// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
+// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
-// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
+// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8*
-// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
+// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
 // CHECK:    [[TMP63:%.*]] = load i32, i32* [[N]],
 // CHECK:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]],
 // CHECK:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]],
@@ -157,7 +168,7 @@ sum = 0.0;
 
 // CHECK:    ret i32
 
-// CHECK: define internal void @[[RED_INIT1]](i8* %0)
+// CHECK: define internal void @[[RED_INIT1]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -166,9 +177,8 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT2]](i8* %0)
-// CHECK: load i8*, i8** [[RED1]],
-// CHECK: call void [[OMP_INIT1:@.+]](
+// CHECK: define internal void @[[RED_INIT2]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: call void [[OMP_INIT1:@.+]](%struct.S*
 // CHECK: ret void
 
 // CHECK: define internal void [[OMP_COMB1:@.+]](%struct.S* noalias %0, %struct.S* noalias %1)
@@ -187,7 +197,7 @@ sum = 0.0;
 // CHECK: call void [[OMP_COMB1]](
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT3]](i8* %0)
+// CHECK: define internal void @[[RED_INIT3]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -196,7 +206,7 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT4]](i8* %0)
+// CHECK: define internal void @[[RED_INIT4]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: load i64, i64* [[RED_SIZE2]]
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void

diff  --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
index 923b100ad104..4640b44cbe93 100644
--- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp
@@ -40,10 +40,10 @@ int main(int argc, char **argv) {
 
 // CHECK-LABEL: @main
 // CHECK:       void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID:%.+]])
-// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 3, i8* %
+// CHECK:       [[TD1:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 3, i8* %
 // CHECK-NEXT:  store i8* [[TD1]], i8** [[TD1_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void @__kmpc_taskgroup(%struct.ident_t* @0, i32 [[GTID]])
-// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_task_reduction_init(i32 [[GTID]], i32 2, i8* %
+// CHECK:       [[TD2:%.+]] = call i8* @__kmpc_taskred_init(i32 [[GTID]], i32 2, i8* %
 // CHECK-NEXT:  store i8* [[TD2]], i8** [[TD2_ADDR:%[^,]+]],
 // CHECK-NEXT:  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i16*, i8**, i8**)* [[OMP_PARALLEL:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i64 %{{.+}}, i16* %{{.+}}, i8** [[TD1_ADDR]], i8** [[TD2_ADDR]])
 // CHECK-NEXT:  call void @__kmpc_end_taskgroup(%struct.ident_t* @0, i32 [[GTID]])

diff  --git a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
index 8f443f71e41c..4bd1fd477009 100644
--- a/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_reduction_codegen.cpp
@@ -52,7 +52,7 @@ sum = 0.0;
 // CHECK:    [[C:%.*]] = alloca [100 x %struct.S],
 // CHECK:    [[D:%.*]] = alloca float*,
 // CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
-// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_taskred_input_t],
 // CHECK:    alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
 // CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
@@ -68,16 +68,19 @@ sum = 0.0;
 // CHECK:    call void @__kmpc_taskgroup(%struct.ident_t*
 // CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
 // CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
-// CHECK-DAG:    [[TMP20]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.+]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP21:%.*]] = bitcast float* [[SUM]] to i8*
+// CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP22]],
-// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
-// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP24]],
-// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
-// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 6
 // CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 0
@@ -85,7 +88,10 @@ sum = 0.0;
 // CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
 // CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
 // CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
-// CHECK-DAG:    [[TMP28]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
+// CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
 // CHECK-DAG:    [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64
 // CHECK-DAG:    [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64
 // CHECK-DAG:    [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]]
@@ -93,51 +99,57 @@ sum = 0.0;
 // CHECK-DAG:    [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1
 // CHECK-DAG:    [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
-// CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
-// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
-// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    [[TMP38]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
+// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
 // CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
-// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
+// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
-// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
+// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP42]],
 // CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D]],
 // CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
 // CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
-// CHECK-DAG:    [[TMP43]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
-// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
+// CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
 // CHECK-DAG:    store i64 4, i64* [[TMP46]],
-// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
-// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP48]],
-// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
-// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 6
 // CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
 // CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
 // CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
 // CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
-// CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA]] to i8*
+// CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
 // CHECK-DAG:    [[TMP54:%.*]] = mul nuw i64 [[TMP2]], 4
 // CHECK-DAG:    [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
 // CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
-// CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
-// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
-// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
-// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    [[TMP56]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
+// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
 // CHECK-DAG:    store i8* null, i8** [[TMP58]],
-// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
+// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
 // CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
-// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
+// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 6
 // CHECK-DAG:    store i32 1, i32* [[TMP60]],
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
-// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8*
-// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_taskred_input_t], [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_taskred_input_t]* [[DOTRD_INPUT_]] to i8*
+// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_taskred_init(i32 [[TMP0]], i32 4, i8* [[TMP61]])
 // CHECK:    [[TMP63:%.*]] = load i32, i32* [[N]],
 // CHECK:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]],
 // CHECK:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]],
@@ -153,7 +165,7 @@ sum = 0.0;
 
 // CHECK:    ret i32
 
-// CHECK: define internal void @[[RED_INIT1]](i8* %0)
+// CHECK: define internal void @[[RED_INIT1]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -162,10 +174,9 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT2]](i8* %0)
-// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: define internal void @[[RED_INIT2]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
-// CHECK: call void [[OMP_INIT1:@.+]](
+// CHECK: call void [[OMP_INIT1:@.+]](%struct.S*
 // CHECK: ret void
 
 // CHECK: define internal void [[OMP_COMB1:@.+]](%struct.S* noalias %0, %struct.S* noalias %1)
@@ -184,7 +195,7 @@ sum = 0.0;
 // CHECK: call void [[OMP_COMB1]](
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT3]](i8* %0)
+// CHECK: define internal void @[[RED_INIT3]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void
 
@@ -193,7 +204,7 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
-// CHECK: define internal void @[[RED_INIT4]](i8* %0)
+// CHECK: define internal void @[[RED_INIT4]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
 // CHECK: call i8* @__kmpc_threadprivate_cached(
 // CHECK: store float 0.000000e+00, float* %
 // CHECK: ret void


        


More information about the cfe-commits mailing list